diff options
Diffstat (limited to 'contrib/gcc/config/i386/i386.md')
-rw-r--r-- | contrib/gcc/config/i386/i386.md | 5687 |
1 files changed, 3944 insertions, 1743 deletions
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index 36a0497..1fa2998 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -1,5 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -49,55 +50,77 @@ ;; 'k' Likewise, print the SImode name of the register. ;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh. ;; 'y' Print "st(0)" instead of "st" as a register. -;; + ;; UNSPEC usage: -;; 0 This is a `scas' operation. The mode of the UNSPEC is always SImode. -;; operand 0 is the memory address to scan. -;; operand 1 is a register containing the value to scan for. The mode -;; of the scas opcode will be the same as the mode of this operand. -;; operand 2 is the known alignment of operand 0. -;; 1 This is a `sin' operation. The mode of the UNSPEC is MODE_FLOAT. -;; operand 0 is the argument for `sin'. -;; 2 This is a `cos' operation. The mode of the UNSPEC is MODE_FLOAT. -;; operand 0 is the argument for `cos'. -;; 3 This is part of a `stack probe' operation. The mode of the UNSPEC is -;; always SImode. operand 0 is the size of the stack allocation. -;; 4 This is the source of a fake SET of the frame pointer which is used to -;; prevent insns referencing it being scheduled across the initial -;; decrement of the stack pointer. -;; 5 This is a `bsf' operation. -;; 6 This is the @GOT offset of a PIC address. -;; 7 This is the @GOTOFF offset of a PIC address. -;; 8 This is a reference to a symbol's @PLT address. -;; 9 This is an `fnstsw' operation. -;; 10 This is a `sahf' operation. -;; 11 This is a `fstcw' operation -;; 12 This is behaviour of add when setting carry flag. -;; 13 This is a `eh_return' placeholder. - -;; For SSE/MMX support: -;; 30 This is `fix', guaranteed to be truncating. -;; 31 This is a `emms' operation. -;; 32 This is a `maskmov' operation. -;; 33 This is a `movmsk' operation. -;; 34 This is a `non-temporal' move. -;; 36 This is used to distinguish COMISS from UCOMISS. -;; 37 This is a `ldmxcsr' operation. -;; 38 This is a forced `movaps' instruction (rather than whatever movti does) -;; 39 This is a forced `movups' instruction (rather than whatever movti does) -;; 40 This is a `stmxcsr' operation. -;; 41 This is a `shuffle' operation. -;; 42 This is a `rcp' operation. -;; 43 This is a `rsqsrt' operation. -;; 44 This is a `sfence' operation. -;; 45 This is a noop to prevent excessive combiner cleverness. -;; 46 This is a `femms' operation. -;; 49 This is a 'pavgusb' operation. -;; 50 This is a `pfrcp' operation. -;; 51 This is a `pfrcpit1' operation. -;; 52 This is a `pfrcpit2' operation. -;; 53 This is a `pfrsqrt' operation. -;; 54 This is a `pfrsqrit1' operation. + +(define_constants + [; Relocation specifiers + (UNSPEC_GOT 0) + (UNSPEC_GOTOFF 1) + (UNSPEC_GOTPCREL 2) + (UNSPEC_GOTTPOFF 3) + (UNSPEC_TPOFF 4) + (UNSPEC_NTPOFF 5) + (UNSPEC_DTPOFF 6) + (UNSPEC_GOTNTPOFF 7) + (UNSPEC_INDNTPOFF 8) + + ; Prologue support + (UNSPEC_STACK_PROBE 10) + (UNSPEC_STACK_ALLOC 11) + (UNSPEC_SET_GOT 12) + (UNSPEC_SSE_PROLOGUE_SAVE 13) + + ; TLS support + (UNSPEC_TP 15) + (UNSPEC_TLS_GD 16) + (UNSPEC_TLS_LD_BASE 17) + + ; Other random patterns + (UNSPEC_SCAS 20) + (UNSPEC_SIN 21) + (UNSPEC_COS 22) + (UNSPEC_BSF 23) + (UNSPEC_FNSTSW 24) + (UNSPEC_SAHF 25) + (UNSPEC_FSTCW 26) + (UNSPEC_ADD_CARRY 27) + (UNSPEC_FLDCW 28) + + ; For SSE/MMX support: + (UNSPEC_FIX 30) + (UNSPEC_MASKMOV 32) + (UNSPEC_MOVMSK 33) + (UNSPEC_MOVNT 34) + (UNSPEC_MOVA 38) + (UNSPEC_MOVU 39) + (UNSPEC_SHUFFLE 41) + (UNSPEC_RCP 42) + (UNSPEC_RSQRT 43) + (UNSPEC_SFENCE 44) + (UNSPEC_NOP 45) ; prevents combiner cleverness + (UNSPEC_PAVGUSB 49) + (UNSPEC_PFRCP 50) + (UNSPEC_PFRCPIT1 51) + (UNSPEC_PFRCPIT2 52) + (UNSPEC_PFRSQRT 53) + (UNSPEC_PFRSQIT1 54) + (UNSPEC_PSHUFLW 55) + (UNSPEC_PSHUFHW 56) + (UNSPEC_MFENCE 59) + (UNSPEC_LFENCE 60) + (UNSPEC_PSADBW 61) + ]) + +(define_constants + [(UNSPECV_BLOCKAGE 0) + (UNSPECV_EH_RETURN 13) + (UNSPECV_EMMS 31) + (UNSPECV_LDMXCSR 37) + (UNSPECV_STMXCSR 40) + (UNSPECV_FEMMS 46) + (UNSPECV_CLFLUSH 57) + ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -116,26 +139,44 @@ ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" - "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx,fistp" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + icmp,test,ibr,setcc,icmov, + push,pop,call,callv, + str,cld, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp, + sselog,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn -(define_attr "mode" "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI" +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI,V4SF,V2DF,V2SF" (const_string "unknown")) -;; Set for i387 operations. -(define_attr "i387" "" - (if_then_else (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") - (const_int 1) - (const_int 0))) +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") + (const_string "i387") + (eq_attr "type" "sselog,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" - (cond [(eq_attr "type" "incdec,setcc,icmov,ibr,str,cld,lea,other,multi,idiv,sse,mmx") + (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv") (const_int 0) - (eq_attr "i387" "1") + (eq_attr "unit" "i387,sse,mmx") (const_int 0) - (eq_attr "type" "alu1,negnot,alu,icmp,imovx,ishift,imul,push,pop") + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, + imul,icmp,push,pop") (symbol_ref "ix86_attr_length_immediate_default(insn,1)") (eq_attr "type" "imov,test") (symbol_ref "ix86_attr_length_immediate_default(insn,0)") @@ -147,22 +188,20 @@ (if_then_else (match_operand 1 "constant_call_address_operand" "") (const_int 4) (const_int 0)) + ;; We don't know the size before shorten_branches. Expect + ;; the instruction to fit for better scheduling. (eq_attr "type" "ibr") - (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) - (lt (minus (match_dup 0) (pc)) - (const_int 124))) - (const_int 1) - (const_int 4)) + (const_int 1) ] - (symbol_ref "/* Update immediate_length and other attributes! */ abort(),1"))) + (symbol_ref "/* Update immediate_length and other attributes! */ + abort(),1"))) ;; The (bounding maximum) length of an instruction address. (define_attr "length_address" "" (cond [(eq_attr "type" "str,cld,other,multi,fxch") (const_int 0) (and (eq_attr "type" "call") - (match_operand 1 "constant_call_address_operand" "")) + (match_operand 0 "constant_call_address_operand" "")) (const_int 0) (and (eq_attr "type" "callv") (match_operand 1 "constant_call_address_operand" "")) @@ -172,16 +211,25 @@ ;; Set when length prefix is used. (define_attr "prefix_data16" "" - (if_then_else (eq_attr "mode" "HI") + (if_then_else (ior (eq_attr "mode" "HI") + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF"))) (const_int 1) (const_int 0))) ;; Set when string REP prefix is used. -(define_attr "prefix_rep" "" (const_int 0)) +(define_attr "prefix_rep" "" + (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + (const_int 0))) ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" - (if_then_else (eq_attr "type" "imovx,setcc,icmov,sse,mmx") + (if_then_else + (eq_attr "type" + "imovx,setcc,icmov, + sselog,sseiadd,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_int 1) (const_int 0))) @@ -189,7 +237,7 @@ (define_attr "modrm" "" (cond [(eq_attr "type" "str,cld") (const_int 0) - (eq_attr "i387" "1") + (eq_attr "unit" "i387") (const_int 0) (and (eq_attr "type" "incdec") (ior (match_operand:SI 1 "register_operand" "") @@ -205,6 +253,12 @@ (and (match_operand 0 "register_operand" "") (match_operand 1 "immediate_operand" ""))) (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) ] (const_int 1))) @@ -214,11 +268,15 @@ (define_attr "length" "" (cond [(eq_attr "type" "other,multi,fistp") (const_int 16) - ] + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address")))] (plus (plus (attr "modrm") (plus (attr "prefix_0f") - (plus (attr "i387") - (const_int 1)))) + (const_int 1))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -243,7 +301,7 @@ (if_then_else (match_operand 0 "memory_operand" "") (const_string "both") (const_string "load")) - (eq_attr "type" "icmp,test") + (eq_attr "type" "icmp,test,ssecmp,mmxcmp,fcmp") (if_then_else (ior (match_operand 0 "memory_operand" "") (match_operand 1 "memory_operand" "")) (const_string "load") @@ -270,7 +328,12 @@ (const_string "store") (match_operand 1 "memory_operand" "") (const_string "load") - (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx") + (and (eq_attr "type" + "!alu1,negnot, + imov,imovx,icmp,test, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecvt, + mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") (and (eq_attr "type" "icmov") @@ -284,11 +347,11 @@ (define_attr "imm_disp" "false,true,unknown" (cond [(eq_attr "type" "other,multi") (const_string "unknown") - (and (eq_attr "type" "icmp,test,imov") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 1 "immediate_operand" ""))) (const_string "true") - (and (eq_attr "type" "alu,ishift,imul,idiv") + (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 2 "immediate_operand" ""))) (const_string "true") @@ -305,710 +368,10 @@ [(set_attr "length" "128") (set_attr "type" "multi")]) -;; Pentium Scheduling -;; -;; The Pentium is an in-order core with two integer pipelines. - -;; True for insns that behave like prefixed insns on the Pentium. -(define_attr "pent_prefix" "false,true" - (if_then_else (ior (eq_attr "prefix_0f" "1") - (ior (eq_attr "prefix_data16" "1") - (eq_attr "prefix_rep" "1"))) - (const_string "true") - (const_string "false"))) - -;; Categorize how an instruction slots. - -;; The non-MMX Pentium slots an instruction with prefixes on U pipe only, -;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium -;; rules, because it results in noticeably better code on non-MMX Pentium -;; and doesn't hurt much on MMX. (Prefixed instructions are not very -;; common, so the scheduler usualy has a non-prefixed insn to pair). - -(define_attr "pent_pair" "uv,pu,pv,np" - (cond [(eq_attr "imm_disp" "true") - (const_string "np") - (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec") - (and (eq_attr "type" "pop,push") - (eq_attr "memory" "!both"))) - (if_then_else (eq_attr "pent_prefix" "true") - (const_string "pu") - (const_string "uv")) - (eq_attr "type" "ibr") - (const_string "pv") - (and (eq_attr "type" "ishift") - (match_operand 2 "const_int_operand" "")) - (const_string "pu") - (and (eq_attr "type" "call") - (match_operand 0 "constant_call_address_operand" "")) - (const_string "pv") - (and (eq_attr "type" "callv") - (match_operand 1 "constant_call_address_operand" "")) - (const_string "pv") - ] - (const_string "np"))) - -;; Rough readiness numbers. Fine tuning happens in i386.c. -;; -;; u describes pipe U -;; v describes pipe V -;; uv describes either pipe U or V for those that can issue to either -;; np describes not paring -;; fpu describes fpu -;; fpm describes fp insns of different types are not pipelined. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "imul")) - 11 11) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "imul")) - 11 11) - -;; Rep movs takes minimally 12 cycles. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "str")) - 12 12) - -; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22 -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "idiv")) - 46 46) - -; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode, -; 3 cycles for XFmode. Stores takes 2 cycles for SF/DF and 3 for XF. -; fldz and fld1 takes 2 cycles. Only reg-reg moves are pairable. -; The integer <-> fp conversion is not modeled correctly. Fild behaves -; like normal fp operation and fist takes 6 cycles. - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF")))) - 3 3) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF")))) - 3 3) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (ior (match_operand 1 "immediate_operand" "") - (eq_attr "memory" "store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (ior (match_operand 1 "immediate_operand" "") - (eq_attr "memory" "store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "cld")) - 2 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "none,load"))) - 1 1) - -; Read/Modify/Write instructions usually take 3 cycles. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "pu") - (eq_attr "memory" "both")))) - 3 3) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "both")))) - 3 3) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,negnot,ishift") - (and (eq_attr "pent_pair" "np") - (eq_attr "memory" "both")))) - 3 3) - -; Read/Modify or Modify/Write instructions usually take 2 cycles. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "pu") - (eq_attr "memory" "load,store")))) - 2 2) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "load,store")))) - 2 2) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,ishift") - (and (eq_attr "pent_pair" "np") - (eq_attr "memory" "load,store")))) - 2 2) - -; Insns w/o memory operands and move instructions usually take one cycle. -(define_function_unit "pent_u" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "pu")) - 1 1) - -(define_function_unit "pent_v" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "pv")) - 1 1) - -(define_function_unit "pent_uv" 2 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "!np")) - 1 1) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "np")) - 1 1) - -; Pairable insns only conflict with other non-pairable insns. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "both")))) - 3 3 - [(eq_attr "pent_pair" "np")]) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (and (eq_attr "type" "alu,alu1,ishift") - (and (eq_attr "pent_pair" "!np") - (eq_attr "memory" "load,store")))) - 2 2 - [(eq_attr "pent_pair" "np")]) - -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "pent_pair" "!np")) - 1 1 - [(eq_attr "pent_pair" "np")]) - -; Floating point instructions usually blocks cycle longer when combined with -; integer instructions, because of the inpaired fxch instruction. -(define_function_unit "pent_np" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")) - 2 2 - [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")]) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fcmp,fxch,fsgn")) - 1 1) - -; Addition takes 3 cycles; assume other random cruft does as well. -; ??? Trivial fp operations such as fabs or fchs takes only one cycle. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fop,fop1,fistp")) - 3 1) - -; Multiplication takes 3 cycles and is only half pipelined. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmul")) - 3 1) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fmul")) - 2 2) - -; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. -; They can overlap with integer insns. Only the last two cycles can overlap -; with other fp insns. Only fsin/fcos can overlap with multiplies. -; Only last two cycles of fsin/fcos can overlap with other instructions. -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fdiv")) - 39 37) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fdiv")) - 39 39) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fpspc")) - 70 68) - -(define_function_unit "pent_mul" 1 1 - (and (eq_attr "cpu" "pentium") - (eq_attr "type" "fpspc")) - 70 70) - -;; Pentium Pro/PII Scheduling -;; -;; The PPro has an out-of-order core, but the instruction decoders are -;; naturally in-order and asymmetric. We get best performance by scheduling -;; for the decoders, for in doing so we give the oo execution unit the -;; most choices. - -;; Categorize how many uops an ia32 instruction evaluates to: -;; one -- an instruction with 1 uop can be decoded by any of the -;; three decoders. -;; few -- an instruction with 1 to 4 uops can be decoded only by -;; decoder 0. -;; many -- a complex instruction may take an unspecified number of -;; cycles to decode in decoder 0. - -(define_attr "ppro_uops" "one,few,many" - (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") - (const_string "many") - (eq_attr "type" "icmov,fcmov,str,cld") - (const_string "few") - (eq_attr "type" "imov") - (if_then_else (eq_attr "memory" "store,both") - (const_string "few") - (const_string "one")) - (eq_attr "memory" "!none") - (const_string "few") - ] - (const_string "one"))) - -;; Rough readiness numbers. Fine tuning happens in i386.c. -;; -;; p0 describes port 0. -;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either. -;; p2 describes port 2 for loads. -;; p34 describes ports 3 and 4 for stores. -;; fpu describes the fpu accessed via port 0. -;; ??? It is less than clear if there are separate fadd and fmul units -;; that could operate in parallel. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "ishift,lea,ibr,cld")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imul")) - 4 1) - -;; ??? Does the divider lock out the pipe while it works, -;; or is there a disconnected unit? -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "idiv")) - 17 17) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fop,fop1,fsgn,fistp")) - 3 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fcmov")) - 2 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fcmp")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmov")) - 1 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmul")) - 5 1) - -(define_function_unit "ppro_p0" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fdiv,fpspc")) - 56 1) - -(define_function_unit "ppro_p01" 2 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "!imov,fmov")) - 1 1) - -(define_function_unit "ppro_p01" 2 0 - (and (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imov,fmov")) - (eq_attr "memory" "none")) - 1 1) - -(define_function_unit "ppro_p2" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (ior (eq_attr "type" "pop") - (eq_attr "memory" "load,both"))) - 3 1) - -(define_function_unit "ppro_p34" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (ior (eq_attr "type" "push") - (eq_attr "memory" "store,both"))) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp")) - 1 1) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fmul")) - 5 2) - -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "fdiv,fpspc")) - 56 56) - -;; imul uses the fpu. ??? does it have the same throughput as fmul? -(define_function_unit "fpu" 1 0 - (and (eq_attr "cpu" "pentiumpro") - (eq_attr "type" "imul")) - 4 1) - -;; AMD K6/K6-2 Scheduling -;; -;; The K6 has similar architecture to PPro. Important difference is, that -;; there are only two decoders and they seems to be much slower than execution -;; units. So we have to pay much more attention to proper decoding for -;; schedulers. We share most of scheduler code for PPro in i386.c -;; -;; The fp unit is not pipelined and do one operation per two cycles including -;; the FXCH. -;; -;; alu describes both ALU units (ALU-X and ALU-Y). -;; alux describes X alu unit -;; fpu describes FPU unit -;; load describes load unit. -;; branch describes branch unit. -;; store decsribes store unit. This unit is not modelled completely and only -;; used to model lea operation. Otherwise it lie outside of the critical -;; path. -;; -;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - -;; The decoder specification is in the PPro section above! - -;; Shift instructions and certain arithmetic are issued only to X pipe. -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot,cld")) - 1 1) - -;; The QI mode arithmetic is issued to X pipe only. -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec") - (match_operand:QI 0 "general_operand" ""))) - 1 1) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea")) - 1 1) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "imov") - (eq_attr "memory" "none"))) - 1 1) - -(define_function_unit "k6_branch" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "call,callv,ibr")) - 1 1) - -;; Load unit have two cycle latency, but we take care for it in adjust_cost -(define_function_unit "k6_load" 1 0 - (and (eq_attr "cpu" "k6") - (ior (eq_attr "type" "pop") - (eq_attr "memory" "load,both"))) - 1 1) - -(define_function_unit "k6_load" 1 0 - (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "str") - (eq_attr "memory" "load,both"))) - 10 10) - -;; Lea have two instructions, so latency is probably 2 -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "lea")) - 2 1) - -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "str")) - 10 10) - -(define_function_unit "k6_store" 1 0 - (and (eq_attr "cpu" "k6") - (ior (eq_attr "type" "push") - (eq_attr "memory" "store,both"))) - 1 1) - -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fop,fop1,fmov,fcmp,fistp")) - 2 2) - -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fmul")) - 2 2) - -;; ??? Guess -(define_function_unit "k6_fpu" 1 1 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "fdiv,fpspc")) - 56 56) - -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "imul")) - 2 2) - -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "imul")) - 2 2) - -;; ??? Guess -(define_function_unit "k6_alu" 2 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "idiv")) - 17 17) - -(define_function_unit "k6_alux" 1 0 - (and (eq_attr "cpu" "k6") - (eq_attr "type" "idiv")) - 17 17) - -;; AMD Athlon Scheduling -;; -;; The Athlon does contain three pipelined FP units, three integer units and -;; three address generation units. -;; -;; The predecode logic is determining boundaries of instructions in the 64 -;; byte cache line. So the cache line straddling problem of K6 might be issue -;; here as well, but it is not noted in the documentation. -;; -;; Three DirectPath instructions decoders and only one VectorPath decoder -;; is available. They can decode three DirectPath instructions or one VectorPath -;; instruction per cycle. -;; Decoded macro instructions are then passed to 72 entry instruction control -;; unit, that passes -;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. -;; -;; The load/store queue unit is not attached to the schedulers but -;; communicates with all the execution units separately instead. - -(define_attr "athlon_decode" "direct,vector" - (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") - (const_string "vector") - (and (eq_attr "type" "push") - (match_operand 1 "memory_operand" "")) - (const_string "vector") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load,store") - (eq_attr "mode" "XF"))) - (const_string "vector")] - (const_string "direct"))) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "vector")) - 1 1) - -(define_function_unit "athlon_directdec" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1) - -(define_function_unit "athlon_vectordec" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_decode" "direct")) - 1 1 [(eq_attr "athlon_decode" "vector")]) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) - 1 1) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "str")) - 15 15) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_ieu" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "imul")) - 5 0) - -(define_function_unit "athlon_muldiv" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "idiv")) - 42 42) - -(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" - (cond [(eq_attr "type" "fop,fop1,fcmp,fistp") - (const_string "add") - (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") - (const_string "mul") - (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) - (const_string "store") - (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) - (const_string "any") - (and (eq_attr "type" "fmov") - (ior (match_operand:SI 1 "register_operand" "") - (match_operand 1 "immediate_operand" ""))) - (const_string "store") - (eq_attr "type" "fmov") - (const_string "muladd")] - (const_string "none"))) - -;; We use latencies 1 for definitions. This is OK to model colisions -;; in execution units. The real latencies are modeled in the "fp" pipeline. - -;; fsin, fcos: 96-192 -;; fsincos: 107-211 -;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fpspc")) - 100 1) - -;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fdiv")) - 24 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fop,fop1,fmul,fistp")) - 4 1) - -;; XFmode loads are slow. -;; XFmode store is slow too (8 cycles), but we don't need to model it, because -;; there are no dependent instructions. - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fmov") - (and (eq_attr "memory" "load") - (eq_attr "mode" "XF")))) - 10 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fmov,fsgn")) - 2 1) - -;; fcmp and ftst instructions -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "direct"))) - 3 1) - -;; fcmpi instructions. -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (and (eq_attr "type" "fcmp") - (eq_attr "athlon_decode" "vector"))) - 3 1) - -(define_function_unit "athlon_fp" 3 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "type" "fcmov")) - 7 1) - -(define_function_unit "athlon_fp_mul" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "mul")) - 1 1) - -(define_function_unit "athlon_fp_add" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "add")) - 1 1) - -(define_function_unit "athlon_fp_muladd" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "muladd,mul,add")) - 1 1) - -(define_function_unit "athlon_fp_store" 1 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "athlon_fpunits" "store")) - 1 1) - -;; We don't need to model the Address Generation Unit, since we don't model -;; the re-order buffer yet and thus we never schedule more than three operations -;; at time. Later we may want to experiment with MD_SCHED macros modeling the -;; decoders independently on the functional units. - -;(define_function_unit "athlon_agu" 3 0 -; (and (eq_attr "cpu" "athlon") -; (and (eq_attr "memory" "!none") -; (eq_attr "athlon_fpunits" "none"))) -; 1 1) - -;; Model load unit to avoid too long sequences of loads. We don't need to -;; model store queue, since it is hardly going to be bottleneck. - -(define_function_unit "athlon_load" 2 0 - (and (eq_attr "cpu" "athlon") - (eq_attr "memory" "load,both")) - 1 1) - +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") ;; Compare instructions. @@ -1382,7 +745,8 @@ [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(compare:CCFP (match_operand 1 "register_operand" "f") - (match_operand 2 "const0_operand" "X"))] 9))] + (match_operand 2 "const0_operand" "X"))] + UNSPEC_FNSTSW))] "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" @@ -1413,7 +777,8 @@ (unspec:HI [(compare:CCFP (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "nonimmediate_operand" "fm"))] 9))] + (match_operand:SF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "fcmp") @@ -1434,7 +799,8 @@ (unspec:HI [(compare:CCFP (match_operand:DF 1 "register_operand" "f") - (match_operand:DF 2 "nonimmediate_operand" "fm"))] 9))] + (match_operand:DF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -1465,7 +831,8 @@ (unspec:HI [(compare:CCFP (match_operand:XF 1 "register_operand" "f") - (match_operand:XF 2 "register_operand" "f"))] 9))] + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "!TARGET_64BIT && TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -1476,7 +843,8 @@ (unspec:HI [(compare:CCFP (match_operand:TF 1 "register_operand" "f") - (match_operand:TF 2 "register_operand" "f"))] 9))] + (match_operand:TF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -1499,7 +867,8 @@ (unspec:HI [(compare:CCFPU (match_operand 1 "register_operand" "f") - (match_operand 2 "register_operand" "f"))] 9))] + (match_operand 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && GET_MODE (operands[1]) == GET_MODE (operands[2])" @@ -1547,12 +916,12 @@ (define_insn "x86_fnstsw_1" [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI [(reg 18)] 9))] + (unspec:HI [(reg 18)] UNSPEC_FNSTSW))] "TARGET_80387" "fnstsw\t%0" [(set_attr "length" "2") (set_attr "mode" "SI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "ppro_uops" "few")]) ;; FP compares, step 3 @@ -1560,7 +929,7 @@ (define_insn "x86_sahf_1" [(set (reg:CC 17) - (unspec:CC [(match_operand:HI 0 "register_operand" "a")] 10))] + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))] "!TARGET_64BIT" "sahf" [(set_attr "length" "1") @@ -1591,7 +960,7 @@ && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp,sse") + [(set_attr "type" "fcmp,ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1602,7 +971,7 @@ "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1627,7 +996,7 @@ && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "fcmp,sse") + [(set_attr "type" "fcmp,ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1638,7 +1007,7 @@ "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "unknownfp") (set_attr "athlon_decode" "vector")]) @@ -1733,25 +1102,20 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "1")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm") + (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) { - case TYPE_SSE: - if (get_attr_mode (insn) == TImode) + case TYPE_SSEMOV: + if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; - case TYPE_MMX: - if (get_attr_mode (insn) == DImode) + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) return "movq\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; @@ -1759,46 +1123,44 @@ return "lea{l}\t{%1, %0|%0, %1}"; default: - if (flag_pic && SYMBOLIC_CONST (operands[1])) + if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) abort(); return "mov{l}\t{%1, %0|%0, %1}"; } } [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6") - (const_string "mmx") - (eq_attr "alternative" "7,8,9") - (const_string "sse") + (cond [(eq_attr "alternative" "2,3,4") + (const_string "mmxmov") + (eq_attr "alternative" "5,6,7") + (const_string "ssemov") (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") - (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")]) + (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabssi_1_rex64" - [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:SI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{l}\t{%1, %P0|%P0, %1} - mov{l}\t{%1, %a0|%a0, %1} - movabs{l}\t{%1, %a0|%a0, %1}" + mov{l}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*movabssi_2_rex64" [(set (match_operand:SI 0 "register_operand" "=a,r") (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{l}\t{%P1, %0|%0, %P1} mov{l}\t{%a1, %0|%0, %a1}" @@ -1848,14 +1210,9 @@ [(set_attr "type" "push") (set_attr "mode" "QI")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m") - (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) @@ -1872,59 +1229,57 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "imov") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "imov") (and (ne (symbol_ref "TARGET_MOVX") (const_int 0)) - (eq_attr "alternative" "0,1,3,4")) + (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "SI") - (and (eq_attr "alternative" "0,1") + (and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "SI") ] - (const_string "HI"))) - (set_attr "modrm" "0,*,*,0,*,*")]) + (const_string "HI")))]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabshi_1_rex64" - [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:HI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{w}\t{%1, %P0|%P0, %1} - mov{w}\t{%1, %a0|%a0, %1} - movabs{w}\t{%1, %a0|%a0, %1}" + mov{w}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*movabshi_2_rex64" [(set (match_operand:HI 0 "register_operand" "=a,r") (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{w}\t{%P1, %0|%0, %P1} mov{w}\t{%a1, %0|%0, %a1}" @@ -2122,7 +1477,7 @@ (define_expand "movstrictqi" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL" + "! TARGET_PARTIAL_REG_STALL || optimize_size" { /* Don't generate memory->memory moves, go through a register. */ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) @@ -2132,7 +1487,7 @@ (define_insn "*movstrictqi_1" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (match_operand:QI 1 "general_operand" "*qn,m"))] - "! TARGET_PARTIAL_REG_STALL + "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "mov{b}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") @@ -2226,24 +1581,23 @@ ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsqi_1_rex64" - [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:QI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{b}\t{%1, %P0|%P0, %1} - mov{b}\t{%1, %a0|%a0, %1} - movabs{b}\t{%1, %a0|%a0, %1}" + mov{b}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "QI")]) (define_insn "*movabsqi_2_rex64" [(set (match_operand:QI 0 "register_operand" "=a,r") (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{b}\t{%P1, %0|%0, %P1} mov{b}\t{%a1, %0|%0, %a1}" @@ -2484,7 +1838,7 @@ movq\t{%1, %0|%0, %1} movdqa\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,mmx,mmx,sse,sse,sse") + [(set_attr "type" "*,*,mmx,mmx,ssemov,ssemov,ssemov") (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")]) (define_split @@ -2513,19 +1867,19 @@ { switch (get_attr_type (insn)) { - case TYPE_SSE: + case TYPE_SSEMOV: if (register_operand (operands[0], DImode) && register_operand (operands[1], DImode)) return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ - case TYPE_MMX: + case TYPE_MMXMOV: return "movq\t{%1, %0|%0, %1}"; case TYPE_MULTI: return "#"; case TYPE_LEA: return "lea{q}\t{%a1, %0|%0, %a1}"; default: - if (flag_pic && SYMBOLIC_CONST (operands[1])) + if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) abort (); if (get_attr_mode (insn) == MODE_SI) return "mov{l}\t{%k1, %k0|%k0, %k1}"; @@ -2537,9 +1891,9 @@ } [(set (attr "type") (cond [(eq_attr "alternative" "5,6") - (const_string "mmx") - (eq_attr "alternative" "7,8") - (const_string "sse") + (const_string "mmxmov") + (eq_attr "alternative" "7,8,9") + (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") (and (ne (symbol_ref "flag_pic") (const_int 0)) @@ -2557,7 +1911,7 @@ (define_insn "*movabsdi_1_rex64" [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) (match_operand:DI 1 "nonmemory_operand" "a,er"))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{q}\t{%1, %P0|%P0, %1} mov{q}\t{%1, %a0|%a0, %1}" @@ -2571,7 +1925,7 @@ (define_insn "*movabsdi_2_rex64" [(set (match_operand:DI 0 "register_operand" "=a,r") (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{q}\t{%P1, %0|%0, %P1} mov{q}\t{%a1, %0|%0, %a1}" @@ -2711,21 +2065,21 @@ ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:SF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) (set (mem:SF (reg:SI 7)) (match_dup 1))]) (define_split [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "register_operand" ""))] - "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:SF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) (set (mem:SF (reg:DI 7)) (match_dup 1))]) (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") - (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf,rm,*y,*y"))] + (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2763,7 +2117,7 @@ case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2) + if (TARGET_SSE2 && !TARGET_ATHLON) return "pxor\t%0, %0"; else return "xorps\t%0, %0"; @@ -2787,7 +2141,7 @@ abort(); } } - [(set_attr "type" "fmov,fmov,fmov,imov,imov,sse,sse,sse,sse,mmx,mmx,mmx") + [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) (define_insn "*swapsf" @@ -2883,16 +2237,16 @@ ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "!TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:DF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT && reload_completed" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) (set (mem:DF (reg:SI 7)) (match_dup 1))] "") (define_split [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:DF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT && reload_completed" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) (set (mem:DF (reg:DI 7)) (match_dup 1))] "") @@ -2910,7 +2264,7 @@ (define_insn "*movdf_nointeger" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") - (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))] + (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && (reload_in_progress || reload_completed @@ -2949,7 +2303,10 @@ case 4: return "#"; case 5: - return "pxor\t%0, %0"; + if (TARGET_ATHLON) + return "xorpd\t%0, %0"; + else + return "pxor\t%0, %0"; case 6: if (TARGET_PARTIAL_REG_DEPENDENCY) return "movapd\t{%1, %0|%0, %1}"; @@ -2963,12 +2320,12 @@ abort(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") - (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))] + (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && !optimize_size && TARGET_INTEGER_DFMODE_MOVES && (reload_in_progress || reload_completed @@ -3008,7 +2365,10 @@ return "#"; case 5: - return "pxor\t%0, %0"; + if (TARGET_ATHLON) + return "xorpd\t%0, %0"; + else + return "pxor\t%0, %0"; case 6: if (TARGET_PARTIAL_REG_DEPENDENCY) return "movapd\t{%1, %0|%0, %1}"; @@ -3022,7 +2382,7 @@ abort(); } } - [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") + [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) (define_split @@ -3196,28 +2556,28 @@ && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == DFmode) - && (!REG_P (operands[1]) || !ANY_FP_REGNO_P (REGNO (operands[1])))" + && !ANY_FP_REG_P (operands[1])" [(const_int 0)] "ix86_split_long_move (operands); DONE;") (define_split [(set (match_operand:XF 0 "push_operand" "") - (match_operand:XF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:XF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:XF (reg:SI 7)) (match_dup 1))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "register_operand" ""))] - "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:TF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TF (reg:SI 7)) (match_dup 1))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "register_operand" ""))] - "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + (match_operand:TF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) (set (mem:TF (reg:DI 7)) (match_dup 1))]) @@ -3969,15 +3329,15 @@ (define_split [(set (match_operand:DF 0 "push_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) (set (mem:DF (reg:SI 7)) (float_extend:DF (match_dup 1)))]) (define_split [(set (match_operand:DF 0 "push_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) (set (mem:DF (reg:DI 7)) (float_extend:DF (match_dup 1)))]) @@ -3989,8 +3349,8 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) @@ -4002,15 +3362,15 @@ (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TF (reg:SI 7)) (float_extend:TF (match_dup 1)))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) (set (mem:DF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) @@ -4022,8 +3382,8 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") - (float_extend:XF (match_operand:DF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) @@ -4035,23 +3395,27 @@ (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "register_operand" "")))] - "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))] + "!TARGET_64BIT" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) (define_split [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "register_operand" "")))] - "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))] + "TARGET_64BIT" [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) (set (mem:TF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) (define_expand "extendsfdf2" [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))] + (float_extend:DF (match_operand:SF 1 "general_operand" "")))] "TARGET_80387 || TARGET_SSE2" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -4086,7 +3450,7 @@ abort (); } } - [(set_attr "type" "fmov,fmov,sse") + [(set_attr "type" "fmov,fmov,ssecvt") (set_attr "mode" "SF,XF,DF")]) (define_insn "*extendsfdf2_1_sse_only" @@ -4095,14 +3459,18 @@ "!TARGET_80387 && TARGET_SSE2 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "cvtss2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) (define_expand "extendsfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "")))] + (float_extend:XF (match_operand:SF 1 "general_operand" "")))] "!TARGET_64BIT && TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -4141,9 +3509,13 @@ (define_expand "extendsftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "")))] + (float_extend:TF (match_operand:SF 1 "general_operand" "")))] "TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); }) @@ -4182,9 +3554,13 @@ (define_expand "extenddfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "")))] + (float_extend:XF (match_operand:DF 1 "general_operand" "")))] "!TARGET_64BIT && TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (DFmode, operands[1]); }) @@ -4223,9 +3599,13 @@ (define_expand "extenddftf2" [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "")))] + (float_extend:TF (match_operand:DF 1 "general_operand" "")))] "TARGET_80387" { + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (DFmode, operands[1]); }) @@ -4325,7 +3705,7 @@ abort (); } } - [(set_attr "type" "fmov,multi,multi,multi,sse") + [(set_attr "type" "fmov,multi,multi,multi,ssecvt") (set_attr "mode" "SF,SF,SF,SF,DF")]) (define_insn "*truncdfsf2_2" @@ -4348,7 +3728,7 @@ abort (); } } - [(set_attr "type" "sse,fmov") + [(set_attr "type" "ssecvt,fmov") (set_attr "mode" "DF,SF")]) (define_insn "truncdfsf2_3" @@ -4371,7 +3751,7 @@ (match_operand:DF 1 "nonimmediate_operand" "mY")))] "!TARGET_80387 && TARGET_SSE2" "cvtsd2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) (define_split @@ -4396,10 +3776,9 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF - (match_operand:DF 1 "register_operand" ""))) + (match_operand:DF 1 "fp_register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed - && FP_REG_P (operands[1])" + "TARGET_80387 && reload_completed" [(set (match_dup 2) (float_truncate:SF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") @@ -4788,14 +4167,14 @@ (fix:DI (match_operand:SF 1 "nonimmediate_operand" "xm")))] "TARGET_64BIT && TARGET_SSE" "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) (define_insn "fix_truncdfdi_sse" [(set (match_operand:DI 0 "register_operand" "=r") (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] "TARGET_64BIT && TARGET_SSE2" "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) ;; Signed conversion to SImode. @@ -4896,14 +4275,14 @@ (fix:SI (match_operand:SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) (define_insn "fix_truncdfsi_sse" [(set (match_operand:SI 0 "register_operand" "=r") (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] "TARGET_SSE2" "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt")]) (define_split [(set (match_operand:SI 0 "register_operand" "") @@ -5034,22 +4413,22 @@ ;; %% Not used yet. (define_insn "x86_fnstcw_1" [(set (match_operand:HI 0 "memory_operand" "=m") - (unspec:HI [(reg:HI 18)] 11))] + (unspec:HI [(reg:HI 18)] UNSPEC_FSTCW))] "TARGET_80387" "fnstcw\t%0" [(set_attr "length" "2") (set_attr "mode" "HI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "ppro_uops" "few")]) (define_insn "x86_fldcw_1" [(set (reg:HI 18) - (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] 12))] + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] "TARGET_80387" "fldcw\t%0" [(set_attr "length" "2") (set_attr "mode" "HI") - (set_attr "i387" "1") + (set_attr "unit" "i387") (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) @@ -5083,7 +4462,7 @@ fild%z1\t%1 # cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5092,7 +4471,7 @@ (float:SF (match_operand:SI 1 "nonimmediate_operand" "mr")))] "TARGET_SSE" "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5121,7 +4500,7 @@ fild%z1\t%1 # cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5130,7 +4509,7 @@ (float:SF (match_operand:DI 1 "nonimmediate_operand" "mr")))] "TARGET_64BIT && TARGET_SSE" "cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) @@ -5159,7 +4538,7 @@ fild%z1\t%1 # cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5168,7 +4547,7 @@ (float:DF (match_operand:SI 1 "nonimmediate_operand" "mr")))] "TARGET_SSE2" "cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5197,7 +4576,7 @@ fild%z1\t%1 # cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,sse") + [(set_attr "type" "fmov,multi,ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5206,7 +4585,7 @@ (float:DF (match_operand:DI 1 "nonimmediate_operand" "mr")))] "TARGET_SSE2" "cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecvt") (set_attr "mode" "DF") (set_attr "fp_int_src" "true")]) @@ -5278,10 +4657,9 @@ ;; %%% Kill these when reload knows how to do it. (define_split - [(set (match_operand 0 "register_operand" "") + [(set (match_operand 0 "fp_register_operand" "") (float (match_operand 1 "register_operand" "")))] - "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0])) - && FP_REG_P (operands[0])" + "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" [(const_int 0)] { operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); @@ -5320,7 +4698,8 @@ (match_operand:DI 2 "general_operand" ""))) (clobber (reg:CC 17))] "!TARGET_64BIT && reload_completed" - [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] 12)) + [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) (parallel [(set (match_dup 3) (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) @@ -5345,8 +4724,10 @@ (set_attr "ppro_uops" "few")]) (define_insn "*adddi3_cc_rex64" - [(set (reg:CC 17) (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") - (match_operand:DI 2 "x86_64_general_operand" "re,rm")] 12)) + [(set (reg:CC 17) + (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")] + UNSPEC_ADD_CARRY)) (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (plus:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" @@ -5382,8 +4763,10 @@ (set_attr "ppro_uops" "few")]) (define_insn "*addsi3_cc" - [(set (reg:CC 17) (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rm")] 12)) + [(set (reg:CC 17) + (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm")] + UNSPEC_ADD_CARRY)) (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (plus:SI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, SImode, operands)" @@ -5392,8 +4775,10 @@ (set_attr "mode" "SI")]) (define_insn "addqi3_cc" - [(set (reg:CC 17) (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qi,qm")] 12)) + [(set (reg:CC 17) + (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qi,qm")] + UNSPEC_ADD_CARRY)) (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (plus:QI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, QImode, operands)" @@ -5446,7 +4831,7 @@ (define_insn_and_split "*lea_general_1" [(set (match_operand 0 "register_operand" "=r") - (plus (plus (match_operand 1 "register_operand" "r") + (plus (plus (match_operand 1 "index_register_operand" "r") (match_operand 2 "register_operand" "r")) (match_operand 3 "immediate_operand" "i")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode @@ -5478,7 +4863,7 @@ (define_insn_and_split "*lea_general_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "r") (match_operand:SI 2 "register_operand" "r")) (match_operand:SI 3 "immediate_operand" "i"))))] "TARGET_64BIT" @@ -5498,7 +4883,7 @@ (define_insn_and_split "*lea_general_2" [(set (match_operand 0 "register_operand" "=r") - (plus (mult (match_operand 1 "register_operand" "r") + (plus (mult (match_operand 1 "index_register_operand" "r") (match_operand 2 "const248_operand" "i")) (match_operand 3 "nonmemory_operand" "ri")))] "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode @@ -5528,7 +4913,7 @@ (define_insn_and_split "*lea_general_2_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "r") (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "nonmemory_operand" "ri"))))] "TARGET_64BIT" @@ -5547,7 +4932,7 @@ (define_insn_and_split "*lea_general_3" [(set (match_operand 0 "register_operand" "=r") - (plus (plus (mult (match_operand 1 "register_operand" "r") + (plus (plus (mult (match_operand 1 "index_register_operand" "r") (match_operand 2 "const248_operand" "i")) (match_operand 3 "register_operand" "r")) (match_operand 4 "immediate_operand" "i")))] @@ -5581,7 +4966,7 @@ (define_insn_and_split "*lea_general_3_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "r") (match_operand:SI 2 "const248_operand" "n")) (match_operand:SI 3 "register_operand" "r")) (match_operand:SI 4 "immediate_operand" "i"))))] @@ -6318,9 +5703,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6359,9 +5742,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6401,9 +5782,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6440,9 +5819,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6478,9 +5855,7 @@ switch (get_attr_type (insn)) { case TYPE_INCDEC: - if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + if (operands[2] == constm1_rtx) return "inc{w}\t%0"; else if (operands[2] == const1_rtx) return "dec{w}\t%0"; @@ -6522,9 +5897,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return "inc{w}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) + else if (operands[2] == constm1_rtx) return "dec{w}\t%0"; abort(); @@ -6573,9 +5946,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) + else if (operands[2] == constm1_rtx) return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; abort(); @@ -6621,9 +5992,7 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; - else if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) + else if (operands[2] == constm1_rtx) return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; abort(); @@ -6653,6 +6022,40 @@ (const_string "alu"))) (set_attr "mode" "QI,QI,SI")]) +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qnm"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[1] == const1_rtx) + return "inc{b}\t%0"; + else if (operands[1] == constm1_rtx) + return "dec{b}\t%0"; + abort(); + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */ + if (GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%1, %0|%0, %1}"; + } + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu1"))) + (set_attr "mode" "QI")]) + (define_insn "*addqi_2" [(set (reg 17) (compare @@ -7184,6 +6587,17 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + (define_insn "*subqi_2" [(set (reg 17) (compare @@ -7822,7 +7236,7 @@ (use (match_dup 3)) (clobber (reg:CC 17))])] { - /* Avoid use of cltd in favour of a mov+shift. */ + /* Avoid use of cltd in favor of a mov+shift. */ if (!TARGET_USE_CLTD && !optimize_size) { if (true_regnum (operands[1])) @@ -7907,7 +7321,7 @@ (use (match_dup 3)) (clobber (reg:CC 17))])] { - /* Avoid use of cltd in favour of a mov+shift. */ + /* Avoid use of cltd in favor of a mov+shift. */ if (!TARGET_USE_CLTD && !optimize_size) { if (true_regnum (operands[1])) @@ -8180,8 +7594,7 @@ (const_int 8)) (match_operand 1 "const_int_operand" "n")) (const_int 0)))] - "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) <= 0xff - && ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%1, %h0|%h0, %1}" [(set_attr "type" "test") (set_attr "mode" "QI") @@ -8319,10 +7732,57 @@ mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1; mask &= ~(((HOST_WIDE_INT)1 << pos) - 1); - operands[3] = gen_rtx_AND (mode, operands[0], - GEN_INT (trunc_int_for_mode (mask, mode))); + operands[3] = gen_rtx_AND (mode, operands[0], gen_int_mode (mask, mode)); }) +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the converison only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (reg 17) + (compare + (and (match_operand 0 "register_operand" "") + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "reload_completed + && QI_REG_P (operands[0]) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[1]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[1]) & ~(127 << 8)))) + && GET_MODE (operands[0]) != QImode" + [(set (reg:CCNO 17) + (compare:CCNO + (and:SI (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (match_dup 1)) + (const_int 0)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_int_mode (INTVAL (operands[1]) >> 8, SImode);") + +(define_split + [(set (reg 17) + (compare + (and (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "reload_completed + && (!REG_P (operands[0]) || ANY_QI_REG_P (operands[0])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[1]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[1]) & ~127))) + && GET_MODE (operands[0]) != QImode" + [(set (reg:CCNO 17) + (compare:CCNO + (and:QI (match_dup 0) + (match_dup 1)) + (const_int 0)))] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]);") + + ;; %%% This used to optimize known byte-wide and operations to memory, ;; and sometimes to QImode registers. If this is considered useful, ;; it should be done with splitters. @@ -8445,7 +7905,7 @@ (and (match_dup 0) (const_int -65536))) (clobber (reg:CC 17))] - "optimize_size" + "optimize_size || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)" [(set (strict_low_part (match_dup 1)) (const_int 0))] "operands[1] = gen_lowpart (HImode, operands[0]);") @@ -8591,7 +8051,8 @@ (and:QI (match_dup 0) (match_operand:QI 1 "general_operand" "qi,qmi"))) (clobber (reg:CC 17))] - "" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "and{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8627,7 +8088,9 @@ (const_int 0))) (set (strict_low_part (match_dup 0)) (and:QI (match_dup 0) (match_dup 1)))] - "ix86_match_ccmode (insn, CCNOmode)" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "and{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8647,7 +8110,7 @@ (const_int 8)) (match_operand 2 "const_int_operand" "n"))) (clobber (reg:CC 17))] - "(unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") (set_attr "length_immediate" "1") @@ -8675,8 +8138,7 @@ (const_int 8) (const_int 8)) (match_dup 2)))] - "ix86_match_ccmode (insn, CCNOmode) - && (unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "ix86_match_ccmode (insn, CCNOmode)" "and{b}\t{%2, %h0|%h0, %2}" [(set_attr "type" "alu") (set_attr "length_immediate" "1") @@ -8737,6 +8199,51 @@ [(set_attr "type" "alu") (set_attr "length_immediate" "0") (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it intoduces risc +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Logical inclusive OR instructions @@ -8953,7 +8460,8 @@ (ior:QI (match_dup 0) (match_operand:QI 1 "general_operand" "qmi,qi"))) (clobber (reg:CC 17))] - "" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "or{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8978,7 +8486,9 @@ (const_int 0))) (set (strict_low_part (match_dup 0)) (ior:QI (match_dup 0) (match_dup 1)))] - "ix86_match_ccmode (insn, CCNOmode)" + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "or{b}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "QI")]) @@ -8995,6 +8505,118 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "iorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC 17))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC 17))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "ior{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (ior:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Logical XOR instructions @@ -9210,11 +8832,77 @@ [(set_attr "type" "alu") (set_attr "mode" "QI,QI,SI")]) +(define_insn "*xorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (xor:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qi,qmi"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "xorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC 17))] + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + (define_insn "*xorqi_ext_1" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") (const_int 8) (const_int 8)) (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_size)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI (zero_extract:SI (match_operand 1 "ext_register_operand" "0") (const_int 8) (const_int 8)) @@ -9222,7 +8910,7 @@ (const_int 8) (const_int 8)))) (clobber (reg:CC 17))] - "" + "(!TARGET_PARTIAL_REG_STALL || optimize_size)" "xor{b}\t{%h2, %h0|%h0, %h2}" [(set_attr "type" "alu") (set_attr "length_immediate" "0") @@ -9242,6 +8930,20 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*xorqi_2_slp" + [(set (reg 17) + (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qim,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + (define_insn "*xorqi_cc_2" [(set (reg 17) (compare @@ -9316,6 +9018,46 @@ (match_dup 2)))])] "" "") + +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (xor:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since XOR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") ;; Negation instructions @@ -9519,8 +9261,7 @@ operands[0] = force_reg (SFmode, operands[0]); emit_move_insn (reg, gen_lowpart (SFmode, - GEN_INT (trunc_int_for_mode (0x80000000, - SImode)))); + gen_int_mode (0x80000000, SImode))); emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9599,22 +9340,22 @@ "#") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:SF (match_dup 1)))] "") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); + "operands[1] = gen_int_mode (0x80000000, SImode); operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") (define_split @@ -9631,7 +9372,7 @@ if (size >= 12) size = 10; operands[0] = adjust_address (operands[0], QImode, size - 1); - operands[1] = GEN_INT (trunc_int_for_mode (0x80, QImode)); + operands[1] = gen_int_mode (0x80, QImode); }) (define_expand "negdf2" @@ -9651,8 +9392,7 @@ in register. */ rtx reg = gen_reg_rtx (DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, - DImode)); + rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else rtx imm = immed_double_const (0, 0x80000000, DImode); #endif @@ -9688,9 +9428,9 @@ "#") (define_insn "*negdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Yr,*rm")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0"))) + (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9775,23 +9515,22 @@ "#") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:DF (match_dup 1)))] "") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 && reload_completed - && !FP_REGNO_P (REGNO (operands[0]))" + "!TARGET_64BIT && TARGET_80387 && reload_completed" [(parallel [(set (match_dup 3) (xor:SI (match_dup 3) (match_dup 4))) (clobber (reg:CC 17))])] - "operands[4] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); + "operands[4] = gen_int_mode (0x80000000, SImode); split_di (operands+0, 1, operands+2, operands+3);") (define_expand "negxf2" @@ -9820,19 +9559,19 @@ "#") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "fp_register_operand" "") (neg:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:XF (match_dup 1)))] "") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "") (neg:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (0x8000); @@ -9850,19 +9589,19 @@ "#") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "fp_register_operand" "") (neg:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (neg:TF (match_dup 1)))] "") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "register_and_not_fp_reg_operand" "") (neg:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (0x8000); @@ -9982,8 +9721,7 @@ operands[0] = force_reg (SFmode, operands[0]); emit_move_insn (reg, gen_lowpart (SFmode, - GEN_INT (trunc_int_for_mode (0x80000000, - SImode)))); + gen_int_mode (0x80000000, SImode))); emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -10051,22 +9789,22 @@ "#") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387" [(set (match_dup 0) (abs:SF (match_dup 1)))] "") (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); + "operands[1] = gen_int_mode (~0x80000000, SImode); operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") (define_split @@ -10083,7 +9821,7 @@ if (size >= 12) size = 10; operands[0] = adjust_address (operands[0], QImode, size - 1); - operands[1] = GEN_INT (trunc_int_for_mode (~0x80, QImode)); + operands[1] = gen_int_mode (~0x80, QImode); }) (define_expand "absdf2" @@ -10103,8 +9841,7 @@ in register. */ rtx reg = gen_reg_rtx (DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, - DImode)); + rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else rtx imm = immed_double_const (0, 0x80000000, DImode); #endif @@ -10205,23 +9942,22 @@ "#") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:DF (match_dup 1)))] "") (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 && reload_completed && - !FP_REGNO_P (REGNO (operands[0]))" + "!TARGET_64BIT && TARGET_80387 && reload_completed" [(parallel [(set (match_dup 3) (and:SI (match_dup 3) (match_dup 4))) (clobber (reg:CC 17))])] - "operands[4] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); + "operands[4] = gen_int_mode (~0x80000000, SImode); split_di (operands+0, 1, operands+2, operands+3);") (define_expand "absxf2" @@ -10250,19 +9986,19 @@ "#") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "fp_register_operand" "") (abs:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:XF (match_dup 1)))] "") (define_split - [(set (match_operand:XF 0 "register_operand" "") + [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "") (abs:XF (match_operand:XF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (~0x8000); @@ -10277,19 +10013,19 @@ "#") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "fp_register_operand" "") (abs:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:TF (match_dup 1)))] "") (define_split - [(set (match_operand:TF 0 "register_operand" "") + [(set (match_operand:TF 0 "register_and_not_any_fp_reg_operand" "") (abs:TF (match_operand:TF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + "TARGET_80387 && reload_completed" [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = GEN_INT (~0x8000); @@ -10643,7 +10379,7 @@ return "sal{q}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{q}\t%0"; else return "sal{q}\t{%2, %0|%0, %2}"; @@ -10672,8 +10408,7 @@ [(set (match_dup 0) (mult:DI (match_dup 1) (match_dup 2)))] - "operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - DImode));") + "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);") ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant @@ -10701,7 +10436,7 @@ return "sal{q}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{q}\t%0"; else return "sal{q}\t{%2, %0|%0, %2}"; @@ -10849,7 +10584,7 @@ return "sal{l}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%0"; else return "sal{l}\t{%2, %0|%0, %2}"; @@ -10870,7 +10605,7 @@ ;; Convert lea to the lea pattern to avoid flags dependency. (define_split [(set (match_operand 0 "register_operand" "") - (ashift (match_operand 1 "register_operand" "") + (ashift (match_operand 1 "index_register_operand" "") (match_operand:QI 2 "const_int_operand" ""))) (clobber (reg:CC 17))] "reload_completed @@ -10880,8 +10615,7 @@ rtx pat; operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - Pmode)); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); if (Pmode != SImode) pat = gen_rtx_SUBREG (SImode, pat, 0); @@ -10889,6 +10623,26 @@ DONE; }) +;; Rare case of shifting RSP is handled by generating move and shift +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat, clob; + emit_move_insn (operands[1], operands[0]); + pat = gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFT (GET_MODE (operands[0]), + operands[0], operands[2])); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob))); + DONE; +}) + (define_insn "*ashlsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,r") @@ -10911,7 +10665,7 @@ return "sal{l}\t{%b2, %k0|%k0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%k0"; else return "sal{l}\t{%2, %k0|%k0, %2}"; @@ -10939,8 +10693,7 @@ [(set (match_dup 0) (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))] { operands[1] = gen_lowpart (Pmode, operands[1]); - operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), - Pmode)); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); }) ;; This pattern can't accept a variable shift count, since shifts by @@ -10969,7 +10722,7 @@ return "sal{l}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%0"; else return "sal{l}\t{%2, %0|%0, %2}"; @@ -11008,7 +10761,7 @@ return "sal{l}\t{%b2, %k0|%k0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{l}\t%k0"; else return "sal{l}\t{%2, %k0|%k0, %2}"; @@ -11053,7 +10806,7 @@ return "sal{w}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11091,7 +10844,7 @@ return "sal{w}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11133,7 +10886,7 @@ return "sal{w}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{w}\t%0"; else return "sal{w}\t{%2, %0|%0, %2}"; @@ -11189,7 +10942,7 @@ } else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%0"; @@ -11245,7 +10998,7 @@ } else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%0"; @@ -11297,7 +11050,7 @@ return "sal{b}\t{%b2, %0|%0, %b2}"; else if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 1 - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + && (TARGET_SHIFT1 || optimize_size)) return "sal{b}\t%0"; else return "sal{b}\t{%2, %0|%0, %2}"; @@ -11353,7 +11106,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11385,7 +11138,7 @@ (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" "sar{q}\t%0" [(set_attr "type" "ishift") @@ -11542,7 +11295,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11556,7 +11309,7 @@ (match_operand:QI 2 "const_int_1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11597,7 +11350,7 @@ (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%0" [(set_attr "type" "ishift") @@ -11615,7 +11368,7 @@ (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "sar{l}\t%k0" [(set_attr "type" "ishift") @@ -11666,7 +11419,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11698,7 +11451,7 @@ (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" "sar{w}\t%0" [(set_attr "type" "ishift") @@ -11738,7 +11491,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "sar{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11746,6 +11499,21 @@ (const_string "2") (const_string "*")))]) +(define_insn "*ashrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "sar{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*ashrqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") @@ -11758,6 +11526,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "QI")]) +(define_insn "*ashrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + sar{b}\t{%1, %0|%0, %1} + sar{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -11770,7 +11551,7 @@ (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" "sar{b}\t%0" [(set_attr "type" "ishift") @@ -11822,7 +11603,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{q}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11854,7 +11635,7 @@ (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{q}\t%0" [(set_attr "type" "ishift") @@ -11932,7 +11713,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{l}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -11946,7 +11727,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{l}\t%k0" [(set_attr "type" "ishift") (set_attr "length" "2")]) @@ -11988,7 +11769,7 @@ (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%0" [(set_attr "type" "ishift") @@ -12006,7 +11787,7 @@ (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{l}\t%k0" [(set_attr "type" "ishift") @@ -12057,7 +11838,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{w}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12089,7 +11870,7 @@ (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" "shr{w}\t%0" [(set_attr "type" "ishift") @@ -12129,7 +11910,7 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "shr{b}\t%0" [(set_attr "type" "ishift") (set (attr "length") @@ -12137,6 +11918,20 @@ (const_string "2") (const_string "*")))]) +(define_insn "*lshrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "shr{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*lshrqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") @@ -12149,6 +11944,19 @@ [(set_attr "type" "ishift") (set_attr "mode" "QI")]) +(define_insn "*lshrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + shr{b}\t{%1, %0|%0, %1} + shr{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + ;; This pattern can't accept a variable shift count, since shifts by ;; zero don't affect the flags. We assume that shifts by constant ;; zero are optimized away. @@ -12161,7 +11969,7 @@ (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && (TARGET_SHIFT1 || optimize_size) && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" "shr{b}\t%0" [(set_attr "type" "ishift") @@ -12203,9 +12011,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{q}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") @@ -12220,7 +12028,7 @@ "@ rol{q}\t{%2, %0|%0, %2} rol{q}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "DI")]) (define_expand "rotlsi3" @@ -12237,9 +12045,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{l}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") @@ -12252,9 +12060,9 @@ (match_operand:QI 2 "const_int_1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{l}\t%k0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "length" "2")]) (define_insn "*rotlsi3_1" @@ -12266,7 +12074,7 @@ "@ rol{l}\t{%2, %0|%0, %2} rol{l}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_insn "*rotlsi3_1_zext" @@ -12279,7 +12087,7 @@ "@ rol{l}\t{%2, %k0|%k0, %2} rol{l}\t{%b2, %k0|%k0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_expand "rotlhi3" @@ -12296,9 +12104,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{w}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") @@ -12313,7 +12121,7 @@ "@ rol{w}\t{%2, %0|%0, %2} rol{w}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "HI")]) (define_expand "rotlqi3" @@ -12324,20 +12132,47 @@ "TARGET_QIMODE_MATH" "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;") +(define_insn "*rotlqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "rol{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + (define_insn "*rotlqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "rol{b}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") (const_string "*")))]) +(define_insn "*rotlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + rol{b}\t{%1, %0|%0, %1} + rol{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + (define_insn "*rotlqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") @@ -12347,7 +12182,7 @@ "@ rol{b}\t{%2, %0|%0, %2} rol{b}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "QI")]) (define_expand "rotrdi3" @@ -12364,9 +12199,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{q}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:DI 0 "register_operand" "") (const_string "2") @@ -12381,7 +12216,7 @@ "@ ror{q}\t{%2, %0|%0, %2} ror{q}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "DI")]) (define_expand "rotrsi3" @@ -12398,9 +12233,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{l}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") @@ -12413,9 +12248,9 @@ (match_operand:QI 2 "const_int_1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{l}\t%k0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand:SI 0 "register_operand" "") (const_string "2") @@ -12430,7 +12265,7 @@ "@ ror{l}\t{%2, %0|%0, %2} ror{l}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_insn "*rotrsi3_1_zext" @@ -12443,7 +12278,7 @@ "@ ror{l}\t{%2, %k0|%k0, %2} ror{l}\t{%b2, %k0|%k0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "SI")]) (define_expand "rotrhi3" @@ -12460,9 +12295,9 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, HImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{w}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") @@ -12477,7 +12312,7 @@ "@ ror{w}\t{%2, %0|%0, %2} ror{w}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") (set_attr "mode" "HI")]) (define_expand "rotrqi3" @@ -12494,9 +12329,23 @@ (match_operand:QI 2 "const_int_1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, QImode, operands) - && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + && (TARGET_SHIFT1 || optimize_size)" "ror{b}\t%0" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (TARGET_SHIFT1 || optimize_size)" + "ror{b}\t%0" + [(set_attr "type" "rotate1") (set (attr "length") (if_then_else (match_operand 0 "register_operand" "") (const_string "2") @@ -12511,7 +12360,20 @@ "@ ror{b}\t{%2, %0|%0, %2} ror{b}\t{%b2, %0|%0, %b2}" - [(set_attr "type" "ishift") + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_insn "*rotrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + ror{b}\t{%1, %0|%0, %1} + ror{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") (set_attr "mode" "QI")]) ;; Bit set / bit test instructions @@ -12789,7 +12651,7 @@ (match_operand:SF 3 "nonimmediate_operand" "xm")]))] "TARGET_SSE && reload_completed" "cmp%D1ss\t{%3, %0|%0, %3}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "SF")]) (define_insn "*sse_setccdf" @@ -12799,7 +12661,7 @@ (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] "TARGET_SSE2 && reload_completed" "cmp%D1sd\t{%3, %0|%0, %3}" - [(set_attr "type" "sse") + [(set_attr "type" "ssecmp") (set_attr "mode" "DF")]) ;; Basic conditional jump instructions. @@ -12961,13 +12823,14 @@ "" "%+j%C1\t%l0" [(set_attr "type" "ibr") - (set (attr "prefix_0f") + (set_attr "modrm" "0") + (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) - (const_int 0) - (const_int 1)))]) + (const_int 128))) + (const_int 2) + (const_int 6)))]) (define_insn "*jcc_2" [(set (pc) @@ -12978,13 +12841,14 @@ "" "%+j%c1\t%l0" [(set_attr "type" "ibr") - (set (attr "prefix_0f") + (set_attr "modrm" "0") + (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) - (const_int 0) - (const_int 1)))]) + (const_int 128))) + (const_int 2) + (const_int 6)))]) ;; In general it is not safe to assume too much about CCmode registers, ;; so simplify-rtx stops when it sees a second one. Under certain @@ -13244,7 +13108,15 @@ (label_ref (match_operand 0 "" "")))] "" "jmp\t%l0" - [(set_attr "type" "ibr")]) + [(set_attr "type" "ibr") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 5))) + (set_attr "modrm" "0")]) (define_expand "indirect_jump" [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] @@ -13270,29 +13142,34 @@ (use (label_ref (match_operand 1 "" "")))])] "" { - /* In PIC mode, the table entries are stored GOT-relative. Convert - the relative address to an absolute address. */ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ if (flag_pic) { + rtx op0, op1; + enum rtx_code code; + if (TARGET_64BIT) - operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - gen_rtx_LABEL_REF (Pmode, operands[1]), - NULL_RTX, 0, - OPTAB_DIRECT); - else if (HAVE_AS_GOTOFF_IN_DATA) { - operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - pic_offset_table_rtx, NULL_RTX, - 1, OPTAB_DIRECT); - current_function_uses_pic_offset_table = 1; + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) + { + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; } else { - operands[0] = expand_simple_binop (Pmode, MINUS, pic_offset_table_rtx, - operands[0], NULL_RTX, 1, - OPTAB_DIRECT); - current_function_uses_pic_offset_table = 1; + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); } }) @@ -13357,14 +13234,17 @@ return "dec{l}\t%1\;%+jne\t%l0"; } [(set_attr "ppro_uops" "many") - (set (attr "type") + (set (attr "length") (if_then_else (and (eq_attr "alternative" "0") (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124)))) - (const_string "ibr") - (const_string "multi")))]) + (const_int 128)))) + (const_int 2) + (const_int 16))) + ;; We don't know the type before shorten branches. Optimistically expect + ;; the loop instruction to match. + (set (attr "type") (const_string "ibr"))]) (define_split [(set (pc) @@ -13474,21 +13354,8 @@ (match_operand:SI 3 "" "")))])] "!TARGET_64BIT" { - if (operands[3] == const0_rtx) - { - emit_insn (gen_call (operands[0], operands[1], constm1_rtx)); - DONE; - } - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) - XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - if (TARGET_64BIT) - abort(); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3]); + DONE; }) (define_insn "*call_pop_0" @@ -13530,37 +13397,12 @@ [(call (match_operand:QI 0 "" "") (match_operand 1 "" "")) (use (match_operand 2 "" ""))] - ;; Operand 1 not used on the i386. "" { - rtx insn; - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) - current_function_uses_pic_offset_table = 1; - - if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) - XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - if (TARGET_64BIT && INTVAL (operands[2]) >= 0) - { - rtx reg = gen_rtx_REG (QImode, 0); - emit_move_insn (reg, operands[2]); - insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); - DONE; - } - insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); - DONE; + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL); + DONE; }) -(define_expand "call_exp" - [(call (match_operand:QI 0 "" "") - (match_operand 1 "" ""))] - "" - "") - (define_insn "*call_0" [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) (match_operand 1 "" ""))] @@ -13612,7 +13454,6 @@ [(set_attr "type" "call")]) ;; Call subroutine, returning value in operand 0 -;; (which must be a hard register). (define_expand "call_value_pop" [(parallel [(set (match_operand 0 "" "") @@ -13623,20 +13464,9 @@ (match_operand:SI 4 "" "")))])] "!TARGET_64BIT" { - if (operands[4] == const0_rtx) - { - emit_insn (gen_call_value (operands[0], operands[1], operands[2], - constm1_rtx)); - DONE; - } - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) - XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4]); + DONE; }) (define_expand "call_value" @@ -13647,36 +13477,10 @@ ;; Operand 2 not used on the i386. "" { - rtx insn; - /* Static functions and indirect calls don't need - current_function_uses_pic_offset_table. */ - if (flag_pic - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) - current_function_uses_pic_offset_table = 1; - if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) - XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); - if (TARGET_64BIT && INTVAL (operands[3]) >= 0) - { - rtx reg = gen_rtx_REG (QImode, 0); - emit_move_insn (reg, operands[3]); - insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], - operands[2])); - use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); - DONE; - } - insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], - operands[2])); + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL); DONE; }) -(define_expand "call_value_exp" - [(set (match_operand 0 "" "") - (call (match_operand:QI 1 "" "") - (match_operand:SI 2 "" "")))] - "" - "") - ;; Call subroutine returning any type. (define_expand "untyped_call" @@ -13693,12 +13497,10 @@ simply pretend the untyped call returns a complex long double value. */ - emit_call_insn (TARGET_FLOAT_RETURNS_IN_80387 - ? gen_call_value (gen_rtx_REG (XCmode, FIRST_FLOAT_REG), - operands[0], const0_rtx, - GEN_INT (SSE_REGPARM_MAX - 1)) - : gen_call (operands[0], const0_rtx, - GEN_INT (SSE_REGPARM_MAX - 1))); + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1), + NULL); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -13710,7 +13512,7 @@ registers we stored in the result block. We avoid problems by claiming that all hard registers are used and clobbered at this point. */ - emit_insn (gen_blockage ()); + emit_insn (gen_blockage (const0_rtx)); DONE; }) @@ -13721,7 +13523,7 @@ ;; all of memory. This blocks insns from being moved across this point. (define_insn "blockage" - [(unspec_volatile [(const_int 0)] 0)] + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)] "" "" [(set_attr "length" "0")]) @@ -13781,45 +13583,14 @@ "" "ix86_expand_prologue (); DONE;") -(define_insn "prologue_set_got" +(define_insn "set_got" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI - [(plus:SI (match_dup 0) - (plus:SI (match_operand:SI 1 "symbolic_operand" "") - (minus:SI (pc) (match_operand 2 "" ""))))] 1)) + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC 17))] "!TARGET_64BIT" -{ - if (GET_CODE (operands[2]) == LABEL_REF) - operands[2] = XEXP (operands[2], 0); - if (TARGET_DEEP_BRANCH_PREDICTION) - return "add{l}\t{%1, %0|%0, %1}"; - else - return "add{l}\t{%1+[.-%X2], %0|%0, %a1+(.-%X2)}"; -} - [(set_attr "type" "alu") - ; Since this insn may have two constant operands, we must set the - ; length manually. - (set_attr "length_immediate" "4") - (set_attr "mode" "SI")]) - -(define_insn "prologue_get_pc" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI [(plus:SI (pc) (match_operand 1 "" ""))] 2))] - "!TARGET_64BIT" -{ - if (GET_CODE (operands[1]) == LABEL_REF) - operands[1] = XEXP (operands[1], 0); - output_asm_insn ("call\t%X1", operands); - if (! TARGET_DEEP_BRANCH_PREDICTION) - { - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[1])); - return "pop{l}\t%0"; - } - RET; -} - [(set_attr "type" "multi")]) + { return output_set_got (operands[0]); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) (define_expand "epilogue" [(const_int 1)] @@ -13832,11 +13603,10 @@ "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "register_operand" ""))] + [(use (match_operand 0 "register_operand" ""))] "" { - rtx tmp, sa = operands[0], ra = operands[1]; + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the @@ -13855,7 +13625,8 @@ }) (define_insn_and_split "eh_return_si" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] 13)] + [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] + UNSPECV_EH_RETURN)] "!TARGET_64BIT" "#" "reload_completed" @@ -13863,7 +13634,8 @@ "ix86_expand_epilogue (2); DONE;") (define_insn_and_split "eh_return_di" - [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] 13)] + [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] + UNSPECV_EH_RETURN)] "TARGET_64BIT" "#" "reload_completed" @@ -13879,7 +13651,6 @@ [(set_attr "length_immediate" "0") (set_attr "length" "1") (set_attr "modrm" "0") - (set_attr "modrm" "0") (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) @@ -13892,13 +13663,12 @@ [(set_attr "length_immediate" "0") (set_attr "length" "1") (set_attr "modrm" "0") - (set_attr "modrm" "0") (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); @@ -13987,7 +13757,7 @@ (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_dup 1)] 5))] + (unspec:SI [(match_dup 1)] UNSPEC_BSF))] "" "bsf{l}\t{%1, %0|%0, %1}" [(set_attr "prefix_0f" "1") @@ -13996,6 +13766,173 @@ ;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger ;; and slower than the two-byte movzx insn needed to do the work in SImode. +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "*tls_global_dynamic_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]} + push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop" + [(set_attr "type" "multi") + (set_attr "length" "14")]) + +(define_expand "tls_global_dynamic_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(match_dup 2) + (match_operand:SI 1 "tls_symbolic_operand" "") + (match_dup 3)] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 "")) + (clobber (reg:CC 17))])] + "" +{ + if (flag_pic) + operands[2] = pic_offset_table_rtx; + else + { + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[2])); + } + operands[3] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_global_dynamic_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" + ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +(define_expand "tls_global_dynamic_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call (mem:QI (match_dup 2)) (const_int 0))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)])] + "" +{ + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_insn "*tls_local_dynamic_base_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]} + push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3" + [(set_attr "type" "multi") + (set_attr "length" "13")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 2)] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:CC 17))])] + "" +{ + if (flag_pic) + operands[1] = pic_offset_table_rtx; + else + { + operands[1] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[1])); + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_local_dynamic_base_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call (mem:QI (match_dup 1)) (const_int 0))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "" +{ + operands[1] = ix86_tls_get_addr (); +}) + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand:SI 3 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC 17))] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC 17))])] + "") + ;; These patterns match the binary 387 instructions for addM3, subM3, ;; mulM3 and divM3. There are three patterns for each of DFmode and ;; SFmode. The first is the normal insn, the second the same insn but @@ -14032,7 +13969,9 @@ "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1") - (const_string "sse") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) (if_then_else (match_operand:SF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) @@ -14046,7 +13985,10 @@ "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) (set_attr "mode" "SF")]) (define_insn "*fop_df_comm_nosse" @@ -14075,7 +14017,9 @@ "* return output_387_binary_op (insn, operands);" [(set (attr "type") (if_then_else (eq_attr "alternative" "1") - (const_string "sse") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) (if_then_else (match_operand:SF 3 "mult_operator" "") (const_string "fmul") (const_string "fop")))) @@ -14090,7 +14034,10 @@ && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c' && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) (set_attr "mode" "DF")]) (define_insn "*fop_xf_comm" @@ -14148,8 +14095,14 @@ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "sse") + (cond [(and (eq_attr "alternative" "2") + (match_operand:SF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:SF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") (match_operand:SF 3 "mult_operator" "") (const_string "fmul") (match_operand:SF 3 "div_operator" "") @@ -14166,7 +14119,13 @@ "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse") + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:SF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) (set_attr "mode" "SF")]) ;; ??? Add SSE splitters for these! @@ -14218,7 +14177,7 @@ [(set (attr "type") (cond [(match_operand:DF 3 "mult_operator" "") (const_string "fmul") - (match_operand:DF 3 "div_operator" "") + (match_operand:DF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) @@ -14235,8 +14194,14 @@ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(eq_attr "alternative" "2") - (const_string "sse") + (cond [(and (eq_attr "alternative" "2") + (match_operand:SF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:SF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") (match_operand:DF 3 "mult_operator" "") (const_string "fmul") (match_operand:DF 3 "div_operator" "") @@ -14253,7 +14218,14 @@ "TARGET_SSE2 && TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" - [(set_attr "type" "sse")]) + [(set_attr "mode" "DF") + (set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:SF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd")))]) ;; ??? Add SSE splitters for these! (define_insn "*fop_df_2" @@ -14760,7 +14732,7 @@ (define_insn "sindf2" [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 1))] + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14769,7 +14741,7 @@ (define_insn "sinsf2" [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 1))] + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14779,7 +14751,8 @@ (define_insn "*sinextendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 1))] + (match_operand:SF 1 "register_operand" "0"))] + UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14788,7 +14761,7 @@ (define_insn "sinxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 1))] + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fsin" @@ -14797,7 +14770,7 @@ (define_insn "sintf2" [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 1))] + (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_SIN))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fsin" @@ -14806,7 +14779,7 @@ (define_insn "cosdf2" [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 2))] + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -14815,7 +14788,7 @@ (define_insn "cossf2" [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 2))] + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -14825,7 +14798,8 @@ (define_insn "*cosextendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 2))] + (match_operand:SF 1 "register_operand" "0"))] + UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -14834,8 +14808,8 @@ (define_insn "cosxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] + "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -14843,7 +14817,7 @@ (define_insn "costf2" [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 2))] + (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_COS))] "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" @@ -15737,7 +15711,7 @@ [(set (match_operand:SI 0 "register_operand" "") (unspec:SI [(match_operand:BLK 1 "general_operand" "") (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] 0))] + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -15750,7 +15724,7 @@ [(set (match_operand:DI 0 "register_operand" "") (unspec:DI [(match_operand:BLK 1 "general_operand" "") (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] 0))] + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -15764,7 +15738,7 @@ (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:SI 4 "register_operand" "0")] 0)) + (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS)) (use (reg:SI 19)) (clobber (match_operand:SI 1 "register_operand" "=D")) (clobber (reg:CC 17))] @@ -15779,7 +15753,7 @@ (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") (match_operand:DI 3 "immediate_operand" "i") - (match_operand:DI 4 "register_operand" "0")] 0)) + (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS)) (use (reg:SI 19)) (clobber (match_operand:DI 1 "register_operand" "=D")) (clobber (reg:CC 17))] @@ -15895,6 +15869,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") (set_attr "mode" "DI") @@ -15937,6 +15912,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") (set_attr "mode" "SI") @@ -16043,12 +16019,12 @@ (set_attr "mode" "DF")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(match_operand 4 "" "") (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "") (match_operand:DF 3 "nonimmediate_operand" "")))] - "!TARGET_64BIT && !ANY_FP_REG_P (operands[0]) && reload_completed" + "!TARGET_64BIT && reload_completed" [(set (match_dup 2) (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) (match_dup 5) @@ -16156,13 +16132,13 @@ ;; We can't represent the LT test directly. Do this by swapping the operands. (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "register_operand" "")) (match_operand:SF 3 "register_operand" "") (match_operand:SF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16238,13 +16214,13 @@ ;; We can't represent the LT test directly. Do this by swapping the operands. (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "register_operand" "")) (match_operand:DF 3 "register_operand" "") (match_operand:DF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16319,13 +16295,13 @@ (match_dup 2)))]) (define_split - [(set (match_operand:SF 0 "register_operand" "") + [(set (match_operand:SF 0 "fp_register_operand" "") (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "register_operand" "")) (match_operand:SF 3 "register_operand" "") (match_operand:SF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16400,13 +16376,13 @@ (match_dup 2)))]) (define_split - [(set (match_operand:DF 0 "register_operand" "") + [(set (match_operand:DF 0 "fp_register_operand" "") (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "register_operand" "")) (match_operand:DF 3 "register_operand" "") (match_operand:DF 4 "register_operand" ""))) (clobber (reg:CC 17))] - "FP_REG_P (operands[0]) && reload_completed + "reload_completed && ((operands_match_p (operands[1], operands[3]) && operands_match_p (operands[2], operands[4])) || (operands_match_p (operands[1], operands[4]) @@ -16628,7 +16604,7 @@ ;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison ;; and op2, op0 - zero op2 if comparison was false ;; nand op0, op3 - load op3 to op0 if comparison was false -;; or op2, op0 - get the non-zero one into the result. +;; or op2, op0 - get the nonzero one into the result. (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "sse_comparison_operator" @@ -16744,7 +16720,7 @@ (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "register_operand" "") + [(match_operand 4 "nonimmediate_operand" "") (match_operand 5 "nonimmediate_operand" "")]) (match_operand 2 "nonmemory_operand" "") (match_operand 3 "nonmemory_operand" "")))] @@ -16756,13 +16732,16 @@ (subreg:TI (match_dup 7) 0)))] { PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode)) + if (!sse_comparison_operator (operands[1], VOIDmode) + || !rtx_equal_p (operands[0], operands[4])) { rtx tmp = operands[5]; operands[5] = operands[4]; operands[4] = tmp; PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); } + if (!rtx_equal_p (operands[0], operands[4])) + abort (); if (const0_operand (operands[2], GET_MODE (operands[0]))) { operands[7] = operands[3]; @@ -16788,7 +16767,7 @@ }) (define_insn "allocate_stack_worker_1" - [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3) + [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] UNSPEC_STACK_PROBE) (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0))) (clobber (match_dup 0)) (clobber (reg:CC 17))] @@ -16798,7 +16777,7 @@ (set_attr "length" "5")]) (define_insn "allocate_stack_worker_rex64" - [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] 3) + [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] UNSPEC_STACK_PROBE) (set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0))) (clobber (match_dup 0)) (clobber (reg:CC 17))] @@ -16835,7 +16814,7 @@ [(label_ref (match_operand 0 "" ""))] "!TARGET_64BIT && flag_pic" { - load_pic_register (); + emit_insn (gen_set_got (pic_offset_table_rtx)); DONE; }) @@ -16849,7 +16828,8 @@ (clobber (reg:CC 17))] "! TARGET_PARTIAL_REG_STALL && reload_completed && ((GET_MODE (operands[0]) == HImode - && (!optimize_size || GET_CODE (operands[2]) != CONST_INT + && ((!optimize_size && !TARGET_FAST_PREFIX) + || GET_CODE (operands[2]) != CONST_INT || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))) || (GET_MODE (operands[0]) == QImode && (TARGET_PROMOTE_QImode || optimize_size)))" @@ -16862,6 +16842,10 @@ operands[2] = gen_lowpart (SImode, operands[2]); PUT_MODE (operands[3], SImode);") +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. (define_split [(set (reg 17) (compare (and (match_operand 1 "aligned_operand" "") @@ -16870,39 +16854,44 @@ (set (match_operand 0 "register_operand" "") (and (match_dup 1) (match_dup 2)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode) + && ! optimize_size + && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" [(parallel [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) (const_int 0))) (set (match_dup 0) (and:SI (match_dup 1) (match_dup 2)))])] "operands[2] - = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode)); + = gen_int_mode (INTVAL (operands[2]) + & GET_MODE_MASK (GET_MODE (operands[0])), + SImode); operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]);") +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. (define_split [(set (reg 17) - (compare (and (match_operand 0 "aligned_operand" "") - (match_operand 1 "const_int_operand" "")) + (compare (and (match_operand:HI 0 "aligned_operand" "") + (match_operand:HI 1 "const_int_operand" "")) (const_int 0)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - && (TARGET_PROMOTE_QImode || optimize_size)))" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode) + && ! TARGET_FAST_PREFIX + && ! optimize_size" [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) (const_int 0)))] "operands[1] - = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode)); + = gen_int_mode (INTVAL (operands[1]) + & GET_MODE_MASK (GET_MODE (operands[0])), + SImode); operands[0] = gen_lowpart (SImode, operands[0]);") (define_split @@ -17152,7 +17141,8 @@ (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode) && (true_regnum (operands[0]) != 0 - || CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K')) + || (GET_CODE (operands[1]) == CONST_INT + && CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K'))) && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" [(parallel [(set (reg:CCNO 17) @@ -17375,7 +17365,7 @@ [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "const_int_operand" "")) 0))] - "exact_log2 (INTVAL (operands[1])) >= 0 + "exact_log2 (INTVAL (operands[2])) >= 0 && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) @@ -17853,52 +17843,92 @@ ;; Moves for SSE/MMX regs. (define_insn "movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) (define_insn "movv4si_internal" - [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_insn "movv2di_internal" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "@ + pxor\t%0, %0 + movdqa\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) (define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) (define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) (define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] - "TARGET_3DNOW" - "movq\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))] + "TARGET_3DNOW + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_expand "movti" - [(set (match_operand:TI 0 "general_operand" "") - (match_operand:TI 1 "general_operand" ""))] + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) @@ -17908,9 +17938,72 @@ DONE; }) +(define_insn "movv2df_internal" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorpd\t%0, %0 + movapd\t{%1, %0|%0, %1} + movapd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "movv8hi_internal" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_insn "movv16qi_internal" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) + +(define_expand "movv2df" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V2DFmode, operands); + DONE; +}) + +(define_expand "movv8hi" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V8HImode, operands); + DONE; +}) + +(define_expand "movv16qi" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "") + (match_operand:V16QI 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_vector_move (V16QImode, operands); + DONE; +}) + (define_expand "movv4sf" - [(set (match_operand:V4SF 0 "general_operand" "") - (match_operand:V4SF 1 "general_operand" ""))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SFmode, operands); @@ -17918,17 +18011,26 @@ }) (define_expand "movv4si" - [(set (match_operand:V4SI 0 "general_operand" "") - (match_operand:V4SI 1 "general_operand" ""))] - "TARGET_MMX" + [(set (match_operand:V4SI 0 "nonimmediate_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" ""))] + "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); DONE; }) +(define_expand "movv2di" + [(set (match_operand:V2DI 0 "nonimmediate_operand" "") + (match_operand:V2DI 1 "nonimmediate_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V2DImode, operands); + DONE; +}) + (define_expand "movv2si" - [(set (match_operand:V2SI 0 "general_operand" "") - (match_operand:V2SI 1 "general_operand" ""))] + [(set (match_operand:V2SI 0 "nonimmediate_operand" "") + (match_operand:V2SI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V2SImode, operands); @@ -17936,8 +18038,8 @@ }) (define_expand "movv4hi" - [(set (match_operand:V4HI 0 "general_operand" "") - (match_operand:V4HI 1 "general_operand" ""))] + [(set (match_operand:V4HI 0 "nonimmediate_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V4HImode, operands); @@ -17945,8 +18047,8 @@ }) (define_expand "movv8qi" - [(set (match_operand:V8QI 0 "general_operand" "") - (match_operand:V8QI 1 "general_operand" ""))] + [(set (match_operand:V8QI 0 "nonimmediate_operand" "") + (match_operand:V8QI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V8QImode, operands); @@ -17954,14 +18056,97 @@ }) (define_expand "movv2sf" - [(set (match_operand:V2SF 0 "general_operand" "") - (match_operand:V2SF 1 "general_operand" ""))] + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] "TARGET_3DNOW" { ix86_expand_vector_move (V2SFmode, operands); DONE; }) +(define_insn "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "register_operand" "y"))] + "TARGET_3DNOW" + "#") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "!TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + + (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") (match_operand:TI 1 "nonmemory_operand" "x"))] @@ -17971,7 +18156,51 @@ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:TI (reg:SI 7)) (match_dup 1))] "" - [(set_attr "type" "sse")]) + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V2DF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V2DI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V8HI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) + +(define_insn_and_split "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "nonmemory_operand" "x"))] + "TARGET_SSE2" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V16QI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "multi")]) (define_insn_and_split "*pushv4sf" [(set (match_operand:V4SF 0 "push_operand" "=<") @@ -17982,7 +18211,7 @@ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:V4SF (reg:SI 7)) (match_dup 1))] "" - [(set_attr "type" "sse")]) + [(set_attr "type" "multi")]) (define_insn_and_split "*pushv4si" [(set (match_operand:V4SI 0 "push_operand" "=<") @@ -17993,7 +18222,7 @@ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) (set (mem:V4SI (reg:SI 7)) (match_dup 1))] "" - [(set_attr "type" "sse")]) + [(set_attr "type" "multi")]) (define_insn_and_split "*pushv2si" [(set (match_operand:V2SI 0 "push_operand" "=<") @@ -18041,17 +18270,19 @@ (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "general_operand" "O,xm,x"))] - "TARGET_SSE && !TARGET_64BIT" + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ xorps\t%0, %0 movaps\t{%1, %0|%0, %1} movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov,ssemov,ssemov") + (set_attr "mode" "V4SF")]) (define_insn "*movti_rex64" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ @@ -18060,8 +18291,8 @@ xorps\t%0, %0 movaps\\t{%1, %0|%0, %1} movaps\\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,sse,sse,sse") - (set_attr "mode" "TI")]) + [(set_attr "type" "*,*,ssemov,ssemov,ssemov") + (set_attr "mode" "V4SF")]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -18073,74 +18304,116 @@ ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups -(define_insn "sse_movaps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))] +(define_expand "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVA))] "TARGET_SSE" - "@ - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movaps (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) -(define_insn "sse_movups" +(define_insn "*sse_movaps_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))] + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVA))] + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov,ssemov") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVU))] "TARGET_SSE" - "@ - movups\t{%1, %0|%0, %1} - movups\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movups (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) +(define_insn "*sse_movups_1" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movups\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssecvt") + (set_attr "mode" "V4SF")]) ;; SSE Strange Moves. (define_insn "sse_movmskps" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))] + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] "TARGET_SSE" "movmskps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "mmx_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] + UNSPEC_MOVMSK))] "TARGET_SSE || TARGET_3DNOW_A" "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + (define_insn "mmx_maskmovq" [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] 32))] + (match_operand:V8QI 2 "register_operand" "y")] + UNSPEC_MASKMOV))] "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_maskmovq_rex" [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D")) (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") - (match_operand:V8QI 2 "register_operand" "y")] 32))] + (match_operand:V8QI 2 "register_operand" "y")] + UNSPEC_MASKMOV))] "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "maskmovq\t{%2, %1|%1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "sse_movntv4sf" [(set (match_operand:V4SF 0 "memory_operand" "=m") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))] + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] + UNSPEC_MOVNT))] "TARGET_SSE" "movntps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "V4SF")]) (define_insn "sse_movntdi" [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))] + (unspec:DI [(match_operand:DI 1 "register_operand" "y")] + UNSPEC_MOVNT))] "TARGET_SSE || TARGET_3DNOW_A" "movntq\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxmov") + (set_attr "mode" "DI")]) (define_insn "sse_movhlps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18154,7 +18427,8 @@ (const_int 3)))] "TARGET_SSE" "movhlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_movlhps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18168,7 +18442,8 @@ (const_int 12)))] "TARGET_SSE" "movlhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_movhps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") @@ -18179,7 +18454,8 @@ "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_movlps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") @@ -18190,17 +18466,29 @@ "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movlps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) (define_insn "sse_movss" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18210,7 +18498,8 @@ (const_int 1)))] "TARGET_SSE" "movss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) (define_insn "sse_storess" [(set (match_operand:SF 0 "memory_operand" "=m") @@ -18219,17 +18508,20 @@ (parallel [(const_int 0)])))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemov") + (set_attr "mode" "SF")]) (define_insn "sse_shufps" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm") - (match_operand:SI 3 "immediate_operand" "i")] 41))] + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] "TARGET_SSE" ;; @@@ check operand order for intel/nonintel syntax "shufps\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) ;; SSE arithmetic @@ -18240,7 +18532,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "addps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) (define_insn "vmaddv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18251,7 +18544,8 @@ (const_int 1)))] "TARGET_SSE" "addss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) (define_insn "subv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18259,7 +18553,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "subps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) (define_insn "vmsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18270,7 +18565,8 @@ (const_int 1)))] "TARGET_SSE" "subss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sseadd") + (set_attr "mode" "SF")]) (define_insn "mulv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18278,7 +18574,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "mulps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemul") + (set_attr "mode" "V4SF")]) (define_insn "vmmulv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18289,7 +18586,8 @@ (const_int 1)))] "TARGET_SSE" "mulss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssemul") + (set_attr "mode" "SF")]) (define_insn "divv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18297,7 +18595,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "divps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssediv") + (set_attr "mode" "V4SF")]) (define_insn "vmdivv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18308,7 +18607,8 @@ (const_int 1)))] "TARGET_SSE" "divss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssediv") + (set_attr "mode" "SF")]) ;; SSE square root/reciprocal @@ -18316,45 +18616,52 @@ (define_insn "rcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))] + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] "TARGET_SSE" "rcpps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmrcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42) + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP) (match_operand:V4SF 2 "register_operand" "0") (const_int 1)))] "TARGET_SSE" "rcpss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) (define_insn "rsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF - [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))] + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] "TARGET_SSE" "rsqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43) + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT) (match_operand:V4SF 2 "register_operand" "0") (const_int 1)))] "TARGET_SSE" "rsqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) (define_insn "sqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE" "sqrtps\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18364,206 +18671,358 @@ (const_int 1)))] "TARGET_SSE" "sqrtss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) ;; SSE logical operations. +;; SSE defines logical operations on floating point values. This brings +;; interesting challenge to RTL representation where logicals are only valid +;; on integral types. We deal with this by representing the floating point +;; logical as logical on arguments casted to TImode as this is what hardware +;; really does. Unfortunately hardware requires the type information to be +;; present and thus we must avoid subregs from being simplified and elliminated +;; in later compilation phases. +;; +;; We have following variants from each instruction: +;; sse_andsf3 - the operation taking V4SF vector operands +;; and doing TImode cast on them +;; *sse_andsf3_memory - the operation taking one memory operand casted to +;; TImode, since backend insist on elliminating casts +;; on memory operands +;; sse_andti3_sf_1 - the operation taking SF scalar operands. +;; We can not accept memory operand here as instruction reads +;; whole scalar. This is generated only post reload by GCC +;; scalar float operations that expands to logicals (fabs) +;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode +;; memory operand. Eventually combine can be able +;; to synthetize these using splitter. +;; sse2_anddf3, *sse2_anddf3_memory +;; +;; ;; These are not called andti3 etc. because we really really don't want ;; the compiler to widen DImode ands to TImode ands and then try to move ;; into DImode subregs of SSE registers, and them together, and move out ;; of DImode subregs again! +;; SSE1 single precision floating point logical operation +(define_expand "sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") -(define_insn "*sse_andti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_andti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_insn "*sse_andv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_1" +(define_insn "*sse_andsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) + +(define_expand "sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") + +(define_insn "*sse_nandv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sf_2" +(define_insn "*sse_nandsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "sse_andti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_expand "sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE" + "") + +(define_insn "*sse_iorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "andps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_andti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") +(define_insn "*sse_iorsf3" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE2 + "TARGET_SSE && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_nandti3_df" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" - "andnpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_expand "sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "") + +(define_insn "*sse_xorv4sf3" + [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "*sse_nandti3_sf" +(define_insn "*sse_xorsf3" [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "TARGET_SSE + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V4SF")]) -(define_insn "sse_nandti3" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) +;; SSE2 double precision floating point logical operation + +(define_expand "sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") + +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2" - "andnps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_nandti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") - (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) +(define_insn "*sse2_andv2df3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" - "pnand\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "") -(define_insn "*sse_iorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] +(define_insn "*sse2_nandv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_df_2" +(define_insn "*sse_nandti3_df" [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "Ym")))] "TARGET_SSE2" - "orpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - -(define_insn "*sse_iorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +(define_expand "sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] + "TARGET_SSE2" + "") -(define_insn "sse_iorti3" - [(set (match_operand:TI 0 "register_operand" "=x") +(define_insn "*sse2_iorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "orps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_iorti3_sse2" - [(set (match_operand:TI 0 "register_operand" "=x") +(define_insn "*sse2_iordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "por\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_xorti3_df_1" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] +(define_expand "sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0) + (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0) + (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))] "TARGET_SSE2" + "") + +(define_insn "*sse2_xorv2df3" + [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_xorti3_df_2" - [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) - (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) - (match_operand:TI 2 "nonimmediate_operand" "Ym")))] - "TARGET_SSE2" +(define_insn "*sse2_xordf3" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0) + (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "xorpd\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "V2DF")]) -(define_insn "*sse_xorti3_sf_1" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) - (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) +;; SSE2 integral logicals. These patterns must always come after floating +;; point ones since we don't want compiler to use integer opcodes on floating +;; point SSE values to avoid matching of subregs in the match_operand. +(define_insn "*sse2_andti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sf_2" - [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) - (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) +(define_insn "sse2_andv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "TARGET_SSE2" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) -(define_insn "sse_xorti3" +(define_insn "sse2_nandv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0")) + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "*sse2_iorti3" [(set (match_operand:TI 0 "register_operand" "=x") - (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") + (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] - "TARGET_SSE && !TARGET_SSE2 + "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - "xorps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_iorv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) -(define_insn "*sse_xorti3_sse2" +(define_insn "*sse2_xorti3" [(set (match_operand:TI 0 "register_operand" "=x") (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0") (match_operand:TI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) + +(define_insn "sse2_xorv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog") + (set_attr "mode" "TI")]) ;; Use xor, but don't show input operands so they aren't live before ;; this insn. (define_insn "sse_clrv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] 45))] + (unspec:V4SF [(const_int 0)] UNSPEC_NOP))] "TARGET_SSE" "xorps\t{%0, %0|%0, %0}" - [(set_attr "type" "sse") - (set_attr "memory" "none")]) + [(set_attr "type" "sselog") + (set_attr "memory" "none") + (set_attr "mode" "V4SF")]) + +;; Use xor, but don't show input operands so they aren't live before +;; this insn. +(define_insn "sse_clrv2df" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(const_int 0)] UNSPEC_NOP))] + "TARGET_SSE2" + "xorpd\t{%0, %0|%0, %0}" + [(set_attr "type" "sselog") + (set_attr "memory" "none") + (set_attr "mode" "V4SF")]) ;; SSE mask-generating compares @@ -18574,7 +19033,8 @@ (match_operand:V4SF 2 "register_operand" "x")]))] "TARGET_SSE" "cmp%D3ps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) (define_insn "maskncmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -18589,7 +19049,8 @@ else return "cmpn%D3ps\t{%2, %0|%0, %2}"; } - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) (define_insn "vmmaskcmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -18597,11 +19058,12 @@ (match_operator:V4SI 3 "sse_comparison_operator" [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "register_operand" "x")]) - (match_dup 1) + (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" "cmp%D3ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) (define_insn "vmmaskncmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") @@ -18619,33 +19081,34 @@ else return "cmpn%D3ss\t{%2, %0|%0, %2}"; } - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) (define_insn "sse_comi" [(set (reg:CCFP 17) - (match_operator:CCFP 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFP (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE" "comiss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) (define_insn "sse_ucomi" [(set (reg:CCFPU 17) - (match_operator:CCFPU 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFPU (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE" "ucomiss\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) ;; SSE unpack @@ -18666,7 +19129,8 @@ (const_int 5)))] "TARGET_SSE" "unpckhps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "sse_unpcklps" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18684,7 +19148,8 @@ (const_int 5)))] "TARGET_SSE" "unpcklps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) ;; SSE min/max @@ -18695,7 +19160,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "maxps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmsmaxv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18706,7 +19172,8 @@ (const_int 1)))] "TARGET_SSE" "maxss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) (define_insn "sminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18714,7 +19181,8 @@ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "minps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) (define_insn "vmsminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18725,8 +19193,8 @@ (const_int 1)))] "TARGET_SSE" "minss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) - + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) ;; SSE <-> integer/MMX conversions @@ -18739,7 +19207,8 @@ (const_int 12)))] "TARGET_SSE" "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "cvtps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -18748,16 +19217,19 @@ (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvtps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) (define_insn "cvttps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX) (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvttps2pi\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) (define_insn "cvtsi2ss" [(set (match_operand:V4SF 0 "register_operand" "=x") @@ -18768,7 +19240,21 @@ (const_int 14)))] "TARGET_SSE" "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0,0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") @@ -18777,16 +19263,42 @@ (parallel [(const_int 0)])))] "TARGET_SSE" "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX) (parallel [(const_int 0)])))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sse")]) + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] + UNSPEC_FIX) + (parallel [(const_int 0)])))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,vector")]) ;; MMX insns @@ -18795,59 +19307,77 @@ (define_insn "addv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "addv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "addv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ssaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ssaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "usaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "usaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "subv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -18855,7 +19385,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "subv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18863,7 +19394,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "subv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -18871,7 +19403,19 @@ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "sssubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -18879,7 +19423,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubsb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "sssubv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18887,7 +19432,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ussubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -18895,7 +19441,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubusb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "ussubv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18903,7 +19450,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "psubusw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "mulv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18911,7 +19459,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "smulv4hi3_highpart" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18924,7 +19473,8 @@ (const_int 16))))] "TARGET_MMX" "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "umulv4hi3_highpart" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -18937,7 +19487,8 @@ (const_int 16))))] "TARGET_SSE || TARGET_3DNOW_A" "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "mmx_pmaddwd" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -18958,7 +19509,8 @@ (const_int 3)]))))))] "TARGET_MMX" "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) ;; MMX logical operations @@ -18968,49 +19520,58 @@ (define_insn "mmx_iordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + [(ior:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "por\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "mmx_xordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + [(xor:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "pxor\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx") + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") (set_attr "memory" "none")]) ;; Same as pxor, but don't show input operands so that we don't think ;; they are live. (define_insn "mmx_clrdi" [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(const_int 0)] 45))] + (unspec:DI [(const_int 0)] UNSPEC_NOP))] "TARGET_MMX" "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "mmx") + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI") (set_attr "memory" "none")]) (define_insn "mmx_anddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + [(and:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "pand\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "mmx_nanddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) - (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] "TARGET_MMX" "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) ;; MMX unsigned averages/sum of absolute differences @@ -19032,7 +19593,8 @@ (const_int 1)))] "TARGET_SSE || TARGET_3DNOW_A" "pavgb\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_uavgv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19047,15 +19609,18 @@ (const_int 1)))] "TARGET_SSE || TARGET_3DNOW_A" "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_psadbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))] + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PSADBW))] "TARGET_SSE || TARGET_3DNOW_A" "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; MMX insert/extract/shuffle @@ -19068,7 +19633,8 @@ (match_operand:SI 3 "immediate_operand" "i")))] "TARGET_SSE || TARGET_3DNOW_A" "pinsrw\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_pextrw" [(set (match_operand:SI 0 "register_operand" "=r") @@ -19077,15 +19643,18 @@ [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_SSE || TARGET_3DNOW_A" "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_pshufw" [(set (match_operand:V4HI 0 "register_operand" "=y") (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") - (match_operand:SI 2 "immediate_operand" "i")] 41))] + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] "TARGET_SSE || TARGET_3DNOW_A" "pshufw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) ;; MMX mask-generating comparisons @@ -19096,7 +19665,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpeqb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "eqv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19104,7 +19674,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpeqw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "eqv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19112,7 +19683,8 @@ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpeqd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "gtv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19120,7 +19692,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpgtb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "gtv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19128,7 +19701,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpgtw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) (define_insn "gtv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19136,7 +19710,8 @@ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "pcmpgtd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) ;; MMX max/min insns @@ -19147,7 +19722,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pmaxub\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "smaxv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19155,7 +19731,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pmaxsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "uminv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19163,7 +19740,8 @@ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pminub\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) (define_insn "sminv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19171,7 +19749,8 @@ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_SSE || TARGET_3DNOW_A" "pminsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sse")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) ;; MMX shifts @@ -19182,7 +19761,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psraw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "ashrv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19190,7 +19770,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psrad\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "lshrv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19198,7 +19779,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psrlw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "lshrv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19206,17 +19788,20 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psrld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; See logical MMX insns. (define_insn "mmx_lshrdi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] "TARGET_MMX" "psrlq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "ashlv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19224,7 +19809,8 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "psllw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "ashlv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19232,17 +19818,20 @@ (match_operand:DI 2 "nonmemory_operand" "yi")))] "TARGET_MMX" "pslld\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; See logical MMX insns. (define_insn "mmx_ashldi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI [(ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] + (match_operand:DI 2 "nonmemory_operand" "yi"))] + UNSPEC_NOP))] "TARGET_MMX" "psllq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) ;; MMX pack/unpack insns. @@ -19254,7 +19843,8 @@ (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] "TARGET_MMX" "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_packssdw" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19263,7 +19853,8 @@ (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] "TARGET_MMX" "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_packuswb" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19272,7 +19863,8 @@ (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] "TARGET_MMX" "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19298,7 +19890,8 @@ (const_int 85)))] "TARGET_MMX" "punpckhbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckhwd" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19316,21 +19909,21 @@ (const_int 5)))] "TARGET_MMX" "punpckhwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckhdq" [(set (match_operand:V2SI 0 "register_operand" "=y") (vec_merge:V2SI - (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 1)])) + (match_operand:V2SI 1 "register_operand" "0") (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1) (const_int 0)])) (const_int 1)))] "TARGET_MMX" "punpckhdq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpcklbw" [(set (match_operand:V8QI 0 "register_operand" "=y") @@ -19356,7 +19949,8 @@ (const_int 85)))] "TARGET_MMX" "punpcklbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpcklwd" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19374,7 +19968,8 @@ (const_int 5)))] "TARGET_MMX" "punpcklwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) (define_insn "mmx_punpckldq" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19382,19 +19977,18 @@ (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") (parallel [(const_int 1) (const_int 0)])) - (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") - (parallel [(const_int 0) - (const_int 1)])) + (match_operand:V2SI 2 "register_operand" "y") (const_int 1)))] "TARGET_MMX" "punpckldq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) ;; Miscellaneous stuff (define_insn "emms" - [(unspec_volatile [(const_int 0)] 31) + [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) (clobber (reg:XF 8)) (clobber (reg:XF 9)) (clobber (reg:XF 10)) @@ -19417,23 +20011,24 @@ (set_attr "memory" "unknown")]) (define_insn "ldmxcsr" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)] - "TARGET_MMX" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] + UNSPECV_LDMXCSR)] + "TARGET_SSE" "ldmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "load")]) (define_insn "stmxcsr" [(set (match_operand:SI 0 "memory_operand" "=m") - (unspec_volatile:SI [(const_int 0)] 40))] - "TARGET_MMX" + (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] + "TARGET_SSE" "stmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "store")]) (define_expand "sfence" [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] 44))] + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] "TARGET_SSE || TARGET_3DNOW_A" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); @@ -19442,7 +20037,7 @@ (define_insn "*sfence_insn" [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] 44))] + (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] "TARGET_SSE || TARGET_3DNOW_A" "sfence" [(set_attr "type" "sse") @@ -19457,7 +20052,7 @@ (reg:DI 25) (reg:DI 26) (reg:DI 27) - (reg:DI 28)] 13)) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "")) (use (match_operand:DI 2 "immediate_operand" "")) (use (label_ref:DI (match_operand 3 "" "")))])] @@ -19474,7 +20069,7 @@ (reg:DI 25) (reg:DI 26) (reg:DI 27) - (reg:DI 28)] 13)) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) (use (match_operand:DI 1 "register_operand" "r")) (use (match_operand:DI 2 "const_int_operand" "i")) (use (label_ref:DI (match_operand 3 "" "X")))] @@ -19517,7 +20112,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfadd\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "subv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19525,7 +20121,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfsub\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "subrv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19533,7 +20130,8 @@ (match_operand:V2SF 1 "register_operand" "0")))] "TARGET_3DNOW" "pfsubr\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "gtv2sf3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19541,7 +20139,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfcmpgt\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) (define_insn "gev2sf3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19549,7 +20148,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfcmpge\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) (define_insn "eqv2sf3" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19557,7 +20157,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfcmpeq\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcmp") + (set_attr "mode" "V2SF")]) (define_insn "pfmaxv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19565,7 +20166,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfmax\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pfminv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19573,7 +20175,8 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfmin\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "mulv2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19581,10 +20184,11 @@ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pfmul\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "V2SF")]) (define_insn "femms" - [(unspec_volatile [(const_int 0)] 46) + [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) (clobber (reg:XF 8)) (clobber (reg:XF 9)) (clobber (reg:XF 10)) @@ -19603,14 +20207,16 @@ (clobber (reg:DI 36))] "TARGET_3DNOW" "femms" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) (define_insn "pf2id" [(set (match_operand:V2SI 0 "register_operand" "=y") (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pf2id\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) (define_insn "pf2iw" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19619,7 +20225,8 @@ (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] "TARGET_3DNOW_A" "pf2iw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) (define_insn "pfacc" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19636,7 +20243,8 @@ (parallel [(const_int 1)])))))] "TARGET_3DNOW" "pfacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pfnacc" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19653,7 +20261,8 @@ (parallel [(const_int 1)])))))] "TARGET_3DNOW_A" "pfnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pfpnacc" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19670,7 +20279,8 @@ (parallel [(const_int 1)])))))] "TARGET_3DNOW_A" "pfpnacc\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxadd") + (set_attr "mode" "V2SF")]) (define_insn "pi2fw" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19686,14 +20296,16 @@ (parallel [(const_int 1)])))))))] "TARGET_3DNOW_A" "pi2fw\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) (define_insn "floatv2si2" [(set (match_operand:V2SF 0 "register_operand" "=y") (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] "TARGET_3DNOW" "pi2fd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "V2SF")]) ;; This insn is identical to pavgb in operation, but the opcode is ;; different. To avoid accidentally matching pavgb, use an unspec. @@ -19702,50 +20314,62 @@ [(set (match_operand:V8QI 0 "register_operand" "=y") (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))] + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + UNSPEC_PAVGUSB))] "TARGET_3DNOW" "pavgusb\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxshft") + (set_attr "mode" "TI")]) ;; 3DNow reciprical and sqrt (define_insn "pfrcpv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))] + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRCP))] "TARGET_3DNOW" "pfrcp\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrcpit1v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))] + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT1))] "TARGET_3DNOW" "pfrcpit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrcpit2v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))] + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRCPIT2))] "TARGET_3DNOW" "pfrcpit2\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrsqrtv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") - (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))] + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQRT))] "TARGET_3DNOW" - "pfrsqrt\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pfrsqit1v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))] + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + UNSPEC_PFRSQIT1))] "TARGET_3DNOW" "pfrsqit1\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "mode" "TI")]) (define_insn "pmulhrwv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") @@ -19764,7 +20388,8 @@ (const_int 16))))] "TARGET_3DNOW" "pmulhrw\\t{%2, %0|%0, %2}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "TI")]) (define_insn "pswapdv2si2" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -19772,7 +20397,8 @@ (parallel [(const_int 1) (const_int 0)])))] "TARGET_3DNOW_A" "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "TI")]) (define_insn "pswapdv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -19780,7 +20406,8 @@ (parallel [(const_int 1) (const_int 0)])))] "TARGET_3DNOW_A" "pswapd\\t{%1, %0|%0, %1}" - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmxcvt") + (set_attr "mode" "TI")]) (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") @@ -19843,7 +20470,8 @@ return patterns[locality]; } - [(set_attr "type" "sse")]) + [(set_attr "type" "sse") + (set_attr "memory" "none")]) (define_insn "*prefetch_3dnow" [(prefetch (match_operand:SI 0 "address_operand" "p") @@ -19870,4 +20498,1577 @@ else return "prefetchw\t%a0"; } - [(set_attr "type" "mmx")]) + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +;; SSE2 support + +(define_insn "addv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "addpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmaddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "addsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "subv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "subpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "subsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "mulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "mulpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "V2DF")]) + +(define_insn "vmmulv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "mulsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssemul") + (set_attr "mode" "DF")]) + +(define_insn "divv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "divpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "V2DF")]) + +(define_insn "vmdivv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "divsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssediv") + (set_attr "mode" "DF")]) + +;; SSE min/max + +(define_insn "smaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "maxpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsmaxv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) + +(define_insn "sminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "minpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsminv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE2" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "DF")]) +;; SSE2 square root. There doesn't appear to be an extension for the +;; reciprocal/rsqrt instructions if the Intel manual is to be believed. + +(define_insn "sqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))] + "TARGET_SSE2" + "sqrtpd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V2DF")]) + +(define_insn "vmsqrtv2df2" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) + (match_operand:V2DF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +;; SSE mask-generating compares + +(define_insn "maskcmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")]))] + "TARGET_SSE2" + "cmp%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) + +(define_insn "maskncmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (not:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")])))] + "TARGET_SSE2" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordps\t{%2, %0|%0, %2}"; + else + return "cmpn%D3pd\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) + +(define_insn "vmmaskcmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")]) + (subreg:V2DI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "vmmaskncmpv2df3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (not:V2DI + (match_operator:V2DI 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "x")])) + (subreg:V2DI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE2" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordsd\t{%2, %0|%0, %2}"; + else + return "cmpn%D3sd\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "sse2_comi" + [(set (reg:CCFP 17) + (compare:CCFP (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "comisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +(define_insn "sse2_ucomi" + [(set (reg:CCFPU 17) + (compare:CCFPU (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "ucomisd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + +;; SSE Strange Moves. + +(define_insn "sse2_movmskpd" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "movmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "TARGET_SSE2" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_maskmovdqu" + [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_maskmovdqu_rex64" + [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movntv2df" + [(set (match_operand:V2DF 0 "memory_operand" "=m") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movntv2di" + [(set (match_operand:V2DI 0 "memory_operand" "=m") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movntdq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movntsi" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "register_operand" "r")] + UNSPEC_MOVNT))] + "TARGET_SSE2" + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;; SSE <-> integer/MMX conversions + +;; Conversions between SI and SF + +(define_insn "cvtdq2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtdq2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "cvtps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvttps2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX))] + "TARGET_SSE2" + "cvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; Conversions between SI and DF + +(define_insn "cvtdq2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (vec_select:V2SI + (match_operand:V4SI 1 "nonimmediate_operand" "xm") + (parallel + [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE2" + "cvtdq2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_SSE2" + "cvtpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_SSE2" + "cvttpd2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvtpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "cvtpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvttpd2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] + UNSPEC_FIX))] + "TARGET_SSE2" + "cvttpd2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "cvtpi2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_SSE2" + "cvtpi2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; Conversions between SI and DF + +(define_insn "cvtsd2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "cvtsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + +(define_insn "cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + +(define_insn "cvttsd2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + +(define_insn "cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector,vector")]) + +(define_insn "cvtsi2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + (vec_duplicate:V2DF + (float:DF + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 2)))] + "TARGET_SSE2" + "cvtsi2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") + (vec_duplicate:V2DF + (float:DF + (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 2)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "vector,direct")]) + +;; Conversions between SF and DF + +(define_insn "cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "xm"))) + (const_int 14)))] + "TARGET_SSE2" + "cvtsd2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + +(define_insn "cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + (float_extend:V2DF + (vec_select:V2SF + (match_operand:V4SF 2 "register_operand" "xm") + (parallel [(const_int 0) + (const_int 1)]))) + (const_int 2)))] + "TARGET_SSE2" + "cvtss2sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (subreg:V4SF + (vec_concat:V4SI + (subreg:V2SI (float_truncate:V2SF + (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0) + (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))] + "TARGET_SSE2" + "cvtpd2ps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V4SF")]) + +(define_insn "cvtps2pd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (float_extend:V2DF + (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 1)]))))] + "TARGET_SSE2" + "cvtps2pd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;; SSE2 variants of MMX insns + +;; MMX arithmetic + +(define_insn "addv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "addv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "addv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "addv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssaddv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddsb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ssaddv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "usaddv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddusb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "usaddv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "paddusw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (minus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "subv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (minus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sssubv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubsb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sssubv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ussubv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubusb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "ussubv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "psubusw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "mulv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (mult:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "smulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) + (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "umulv8hi3_highpart" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (truncate:V8HI + (lshiftrt:V8SI + (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0")) + (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) + (const_int 16))))] + "TARGET_SSE2" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulsidi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (mult:DI (zero_extend:DI (vec_select:SI + (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0)]))) + (zero_extend:DI (vec_select:SI + (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_umulv2siv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (mult:V2DI (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (zero_extend:V2DI + (vec_select:V2SI + (match_operand:V4SI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) (const_int 2)])))))] + "TARGET_SSE2" + "pmuludq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseimul") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pmaddwd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)]))) + (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6)])))) + (mult:V4SI + (sign_extend:V4SI (vec_select:V4HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))) + (sign_extend:V4SI (vec_select:V4HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)]))))))] + "TARGET_SSE2" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "sse2_clrti" + [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] + "TARGET_SSE2" + "pxor\t{%0, %0|%0, %0}" + [(set_attr "type" "sseiadd") + (set_attr "memory" "none") + (set_attr "mode" "TI")]) + +;; MMX unsigned averages/sum of absolute differences + +(define_insn "sse2_uavgv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (ashiftrt:V16QI + (plus:V16QI (plus:V16QI + (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")) + (const_vector:V16QI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1)))] + "TARGET_SSE2" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sse2_uavgv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI + (plus:V8HI (plus:V8HI + (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (const_vector:V8HI [(const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1) + (const_int 1) (const_int 1)])) + (const_int 1)))] + "TARGET_SSE2" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +;; @@@ this isn't the right representation. +(define_insn "sse2_psadbw" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")] + UNSPEC_PSADBW))] + "TARGET_SSE2" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + + +;; MMX insert/extract/shuffle + +(define_insn "sse2_pinsrw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0") + (vec_duplicate:V8HI + (truncate:HI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SSE2" + "pinsrw\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (vec_select:HI (match_operand:V8HI 1 "register_operand" "x") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SSE2" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pshufd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] + "TARGET_SSE2" + "pshufd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pshuflw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_PSHUFLW))] + "TARGET_SSE2" + "pshuflw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_pshufhw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_PSHUFHW))] + "TARGET_SSE2" + "pshufhw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; MMX mask-generating comparisons + +(define_insn "eqv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (eq:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "eqv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (eq:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "eqv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (eq:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpeqd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "gtv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (gt:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "gtv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (gt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + +(define_insn "gtv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (gt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pcmpgtd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) + + +;; MMX max/min insns + +(define_insn "umaxv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umax:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "smaxv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smax:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "uminv16qi3" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (umin:V16QI (match_operand:V16QI 1 "register_operand" "0") + (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + +(define_insn "sminv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (smin:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + + +;; MMX shifts + +(define_insn "ashrv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashrv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv8hi3" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv4si3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv2di3" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") + (match_operand:TI 2 "nonmemory_operand" "xi")))] + "TARGET_SSE2" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashrv8hi3_ti" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashrv4si3_ti" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv8hi3_ti" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv4si3_ti" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "lshrv2di3_ti" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv8hi3_ti" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv4si3_ti" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "ashlv2di3_ti" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") + (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + "TARGET_SSE2" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +;; See logical MMX insns for the reason for the unspec. Strictly speaking +;; we wouldn't need here it since we never generate TImode arithmetic. + +;; There has to be some kind of prize for the weirdest new instruction... +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI + [(ashift:TI (match_operand:TI 1 "register_operand" "0") + (mult:SI (match_operand:SI 2 "immediate_operand" "i") + (const_int 8)))] UNSPEC_NOP))] + "TARGET_SSE2" + "pslldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI + [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (mult:SI (match_operand:SI 2 "immediate_operand" "i") + (const_int 8)))] UNSPEC_NOP))] + "TARGET_SSE2" + "psrldq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseishft") + (set_attr "mode" "TI")]) + +;; SSE unpack + +(define_insn "sse2_unpckhpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 1)])) + (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE2" + "unpckhpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_unpcklpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_concat:V2DF + (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 1)]))))] + "TARGET_SSE2" + "unpcklpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; MMX pack/unpack insns. + +(define_insn "sse2_packsswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) + (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packssdw" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_concat:V8HI + (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0")) + (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_packuswb" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_concat:V16QI + (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0")) + (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))] + "TARGET_SSE2" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") + (parallel [(const_int 8) (const_int 0) + (const_int 9) (const_int 1) + (const_int 10) (const_int 2) + (const_int 11) (const_int 3) + (const_int 12) (const_int 4) + (const_int 13) (const_int 5) + (const_int 14) (const_int 6) + (const_int 15) (const_int 7)])) + (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])) + (const_int 21845)))] + "TARGET_SSE2" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 4) (const_int 0) + (const_int 5) (const_int 1) + (const_int 6) (const_int 2) + (const_int 7) (const_int 3)])) + (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])) + (const_int 85)))] + "TARGET_SSE2" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhdq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 2) (const_int 0) + (const_int 3) (const_int 1)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])) + (const_int 5)))] + "TARGET_SSE2" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklbw" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (vec_merge:V16QI + (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 8) + (const_int 1) (const_int 9) + (const_int 2) (const_int 10) + (const_int 3) (const_int 11) + (const_int 4) (const_int 12) + (const_int 5) (const_int 13) + (const_int 6) (const_int 14) + (const_int 7) (const_int 15)])) + (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x") + (parallel [(const_int 8) (const_int 0) + (const_int 9) (const_int 1) + (const_int 10) (const_int 2) + (const_int 11) (const_int 3) + (const_int 12) (const_int 4) + (const_int 13) (const_int 5) + (const_int 14) (const_int 6) + (const_int 15) (const_int 7)])) + (const_int 21845)))] + "TARGET_SSE2" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklwd" + [(set (match_operand:V8HI 0 "register_operand" "=x") + (vec_merge:V8HI + (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5) + (const_int 2) (const_int 6) + (const_int 3) (const_int 7)])) + (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x") + (parallel [(const_int 4) (const_int 0) + (const_int 5) (const_int 1) + (const_int 6) (const_int 2) + (const_int 7) (const_int 3)])) + (const_int 85)))] + "TARGET_SSE2" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckldq" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2) + (const_int 1) (const_int 3)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x") + (parallel [(const_int 2) (const_int 0) + (const_int 3) (const_int 1)])) + (const_int 5)))] + "TARGET_SSE2" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpcklqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") + (parallel [(const_int 1) + (const_int 0)])) + (match_operand:V2DI 1 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE2" + "punpcklqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_punpckhqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_merge:V2DI + (match_operand:V2DI 1 "register_operand" "0") + (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_SSE2" + "punpckhqdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +;; SSE2 moves + +(define_insn "sse2_movapd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVA))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movapd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movupd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movupd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movdqa" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVA))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqa\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movdqu" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] + UNSPEC_MOVU))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movdq2q" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && !TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movdq2q_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movq2dq" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") + (const_int 0)))] + "TARGET_SSE2 && !TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movq2dq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") + (const_int 0)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov,ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (vec_concat:V2DI (vec_select:DI + (match_operand:V2DI 1 "nonimmediate_operand" "xm") + (parallel [(const_int 0)])) + (const_int 0)))] + "TARGET_SSE2" + "movq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_loadd" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) + (const_vector:V4SI [(const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)]) + (const_int 1)))] + "TARGET_SSE2" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_stored" + [(set (match_operand:SI 0 "nonimmediate_operand" "=mr") + (vec_select:SI + (match_operand:V4SI 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "mode" "TI")]) + +(define_insn "sse2_movhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "m,x") + (const_int 2)))] + "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movhpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_movlpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "m,x") + (const_int 1)))] + "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movlpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") + (const_int 1)))] + "TARGET_SSE2" + "movsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_movsd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_merge:V2DF + (match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "register_operand" "x") + (const_int 1)))] + "TARGET_SSE2" + "movsd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_storesd" + [(set (match_operand:DF 0 "memory_operand" "=m") + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE2" + "movsd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "sse2_shufpd" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_SHUFFLE))] + "TARGET_SSE2" + ;; @@@ check operand order for intel/nonintel syntax + "shufpd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_insn "sse2_clflush" + [(unspec_volatile [(match_operand 0 "address_operand" "p")] + UNSPECV_CLFLUSH)] + "TARGET_SSE2" + "clflush %0" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_mfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*mfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] + "TARGET_SSE2" + "mfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse2_lfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*lfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] + "TARGET_SSE2" + "lfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) |