summaryrefslogtreecommitdiffstats
path: root/contrib/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/gcc/config/i386/i386.md')
-rw-r--r--contrib/gcc/config/i386/i386.md5687
1 files changed, 3944 insertions, 1743 deletions
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md
index 36a0497..1fa2998 100644
--- a/contrib/gcc/config/i386/i386.md
+++ b/contrib/gcc/config/i386/i386.md
@@ -1,5 +1,6 @@
;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
@@ -49,55 +50,77 @@
;; 'k' Likewise, print the SImode name of the register.
;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh.
;; 'y' Print "st(0)" instead of "st" as a register.
-;;
+
;; UNSPEC usage:
-;; 0 This is a `scas' operation. The mode of the UNSPEC is always SImode.
-;; operand 0 is the memory address to scan.
-;; operand 1 is a register containing the value to scan for. The mode
-;; of the scas opcode will be the same as the mode of this operand.
-;; operand 2 is the known alignment of operand 0.
-;; 1 This is a `sin' operation. The mode of the UNSPEC is MODE_FLOAT.
-;; operand 0 is the argument for `sin'.
-;; 2 This is a `cos' operation. The mode of the UNSPEC is MODE_FLOAT.
-;; operand 0 is the argument for `cos'.
-;; 3 This is part of a `stack probe' operation. The mode of the UNSPEC is
-;; always SImode. operand 0 is the size of the stack allocation.
-;; 4 This is the source of a fake SET of the frame pointer which is used to
-;; prevent insns referencing it being scheduled across the initial
-;; decrement of the stack pointer.
-;; 5 This is a `bsf' operation.
-;; 6 This is the @GOT offset of a PIC address.
-;; 7 This is the @GOTOFF offset of a PIC address.
-;; 8 This is a reference to a symbol's @PLT address.
-;; 9 This is an `fnstsw' operation.
-;; 10 This is a `sahf' operation.
-;; 11 This is a `fstcw' operation
-;; 12 This is behaviour of add when setting carry flag.
-;; 13 This is a `eh_return' placeholder.
-
-;; For SSE/MMX support:
-;; 30 This is `fix', guaranteed to be truncating.
-;; 31 This is a `emms' operation.
-;; 32 This is a `maskmov' operation.
-;; 33 This is a `movmsk' operation.
-;; 34 This is a `non-temporal' move.
-;; 36 This is used to distinguish COMISS from UCOMISS.
-;; 37 This is a `ldmxcsr' operation.
-;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
-;; 39 This is a forced `movups' instruction (rather than whatever movti does)
-;; 40 This is a `stmxcsr' operation.
-;; 41 This is a `shuffle' operation.
-;; 42 This is a `rcp' operation.
-;; 43 This is a `rsqsrt' operation.
-;; 44 This is a `sfence' operation.
-;; 45 This is a noop to prevent excessive combiner cleverness.
-;; 46 This is a `femms' operation.
-;; 49 This is a 'pavgusb' operation.
-;; 50 This is a `pfrcp' operation.
-;; 51 This is a `pfrcpit1' operation.
-;; 52 This is a `pfrcpit2' operation.
-;; 53 This is a `pfrsqrt' operation.
-;; 54 This is a `pfrsqrit1' operation.
+
+(define_constants
+ [; Relocation specifiers
+ (UNSPEC_GOT 0)
+ (UNSPEC_GOTOFF 1)
+ (UNSPEC_GOTPCREL 2)
+ (UNSPEC_GOTTPOFF 3)
+ (UNSPEC_TPOFF 4)
+ (UNSPEC_NTPOFF 5)
+ (UNSPEC_DTPOFF 6)
+ (UNSPEC_GOTNTPOFF 7)
+ (UNSPEC_INDNTPOFF 8)
+
+ ; Prologue support
+ (UNSPEC_STACK_PROBE 10)
+ (UNSPEC_STACK_ALLOC 11)
+ (UNSPEC_SET_GOT 12)
+ (UNSPEC_SSE_PROLOGUE_SAVE 13)
+
+ ; TLS support
+ (UNSPEC_TP 15)
+ (UNSPEC_TLS_GD 16)
+ (UNSPEC_TLS_LD_BASE 17)
+
+ ; Other random patterns
+ (UNSPEC_SCAS 20)
+ (UNSPEC_SIN 21)
+ (UNSPEC_COS 22)
+ (UNSPEC_BSF 23)
+ (UNSPEC_FNSTSW 24)
+ (UNSPEC_SAHF 25)
+ (UNSPEC_FSTCW 26)
+ (UNSPEC_ADD_CARRY 27)
+ (UNSPEC_FLDCW 28)
+
+ ; For SSE/MMX support:
+ (UNSPEC_FIX 30)
+ (UNSPEC_MASKMOV 32)
+ (UNSPEC_MOVMSK 33)
+ (UNSPEC_MOVNT 34)
+ (UNSPEC_MOVA 38)
+ (UNSPEC_MOVU 39)
+ (UNSPEC_SHUFFLE 41)
+ (UNSPEC_RCP 42)
+ (UNSPEC_RSQRT 43)
+ (UNSPEC_SFENCE 44)
+ (UNSPEC_NOP 45) ; prevents combiner cleverness
+ (UNSPEC_PAVGUSB 49)
+ (UNSPEC_PFRCP 50)
+ (UNSPEC_PFRCPIT1 51)
+ (UNSPEC_PFRCPIT2 52)
+ (UNSPEC_PFRSQRT 53)
+ (UNSPEC_PFRSQIT1 54)
+ (UNSPEC_PSHUFLW 55)
+ (UNSPEC_PSHUFHW 56)
+ (UNSPEC_MFENCE 59)
+ (UNSPEC_LFENCE 60)
+ (UNSPEC_PSADBW 61)
+ ])
+
+(define_constants
+ [(UNSPECV_BLOCKAGE 0)
+ (UNSPECV_EH_RETURN 13)
+ (UNSPECV_EMMS 31)
+ (UNSPECV_LDMXCSR 37)
+ (UNSPECV_STMXCSR 40)
+ (UNSPECV_FEMMS 46)
+ (UNSPECV_CLFLUSH 57)
+ ])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
;; from i386.c.
@@ -116,26 +139,44 @@
;; A basic instruction type. Refinements due to arguments to be
;; provided in other attributes.
(define_attr "type"
- "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx,fistp"
+ "other,multi,
+ alu,alu1,negnot,imov,imovx,lea,
+ incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+ icmp,test,ibr,setcc,icmov,
+ push,pop,call,callv,
+ str,cld,
+ fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,
+ sselog,sseiadd,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv,
+ mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
;; Main data type used by the insn
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI"
+(define_attr "mode"
+ "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI,V4SF,V2DF,V2SF"
(const_string "unknown"))
-;; Set for i387 operations.
-(define_attr "i387" ""
- (if_then_else (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp")
- (const_int 1)
- (const_int 0)))
+;; The CPU unit operations uses.
+(define_attr "unit" "integer,i387,sse,mmx,unknown"
+ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp")
+ (const_string "i387")
+ (eq_attr "type" "sselog,sseiadd,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv")
+ (const_string "sse")
+ (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+ (const_string "mmx")
+ (eq_attr "type" "other")
+ (const_string "unknown")]
+ (const_string "integer")))
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
- (cond [(eq_attr "type" "incdec,setcc,icmov,ibr,str,cld,lea,other,multi,idiv,sse,mmx")
+ (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv")
(const_int 0)
- (eq_attr "i387" "1")
+ (eq_attr "unit" "i387,sse,mmx")
(const_int 0)
- (eq_attr "type" "alu1,negnot,alu,icmp,imovx,ishift,imul,push,pop")
+ (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
+ imul,icmp,push,pop")
(symbol_ref "ix86_attr_length_immediate_default(insn,1)")
(eq_attr "type" "imov,test")
(symbol_ref "ix86_attr_length_immediate_default(insn,0)")
@@ -147,22 +188,20 @@
(if_then_else (match_operand 1 "constant_call_address_operand" "")
(const_int 4)
(const_int 0))
+ ;; We don't know the size before shorten_branches. Expect
+ ;; the instruction to fit for better scheduling.
(eq_attr "type" "ibr")
- (if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
- (lt (minus (match_dup 0) (pc))
- (const_int 124)))
- (const_int 1)
- (const_int 4))
+ (const_int 1)
]
- (symbol_ref "/* Update immediate_length and other attributes! */ abort(),1")))
+ (symbol_ref "/* Update immediate_length and other attributes! */
+ abort(),1")))
;; The (bounding maximum) length of an instruction address.
(define_attr "length_address" ""
(cond [(eq_attr "type" "str,cld,other,multi,fxch")
(const_int 0)
(and (eq_attr "type" "call")
- (match_operand 1 "constant_call_address_operand" ""))
+ (match_operand 0 "constant_call_address_operand" ""))
(const_int 0)
(and (eq_attr "type" "callv")
(match_operand 1 "constant_call_address_operand" ""))
@@ -172,16 +211,25 @@
;; Set when length prefix is used.
(define_attr "prefix_data16" ""
- (if_then_else (eq_attr "mode" "HI")
+ (if_then_else (ior (eq_attr "mode" "HI")
+ (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF")))
(const_int 1)
(const_int 0)))
;; Set when string REP prefix is used.
-(define_attr "prefix_rep" "" (const_int 0))
+(define_attr "prefix_rep" ""
+ (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
+ (const_int 1)
+ (const_int 0)))
;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
- (if_then_else (eq_attr "type" "imovx,setcc,icmov,sse,mmx")
+ (if_then_else
+ (eq_attr "type"
+ "imovx,setcc,icmov,
+ sselog,sseiadd,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv,
+ mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_int 1)
(const_int 0)))
@@ -189,7 +237,7 @@
(define_attr "modrm" ""
(cond [(eq_attr "type" "str,cld")
(const_int 0)
- (eq_attr "i387" "1")
+ (eq_attr "unit" "i387")
(const_int 0)
(and (eq_attr "type" "incdec")
(ior (match_operand:SI 1 "register_operand" "")
@@ -205,6 +253,12 @@
(and (match_operand 0 "register_operand" "")
(match_operand 1 "immediate_operand" "")))
(const_int 0)
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_int 0)
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_int 0)
]
(const_int 1)))
@@ -214,11 +268,15 @@
(define_attr "length" ""
(cond [(eq_attr "type" "other,multi,fistp")
(const_int 16)
- ]
+ (eq_attr "type" "fcmp")
+ (const_int 4)
+ (eq_attr "unit" "i387")
+ (plus (const_int 2)
+ (plus (attr "prefix_data16")
+ (attr "length_address")))]
(plus (plus (attr "modrm")
(plus (attr "prefix_0f")
- (plus (attr "i387")
- (const_int 1))))
+ (const_int 1)))
(plus (attr "prefix_rep")
(plus (attr "prefix_data16")
(plus (attr "length_immediate")
@@ -243,7 +301,7 @@
(if_then_else (match_operand 0 "memory_operand" "")
(const_string "both")
(const_string "load"))
- (eq_attr "type" "icmp,test")
+ (eq_attr "type" "icmp,test,ssecmp,mmxcmp,fcmp")
(if_then_else (ior (match_operand 0 "memory_operand" "")
(match_operand 1 "memory_operand" ""))
(const_string "load")
@@ -270,7 +328,12 @@
(const_string "store")
(match_operand 1 "memory_operand" "")
(const_string "load")
- (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx")
+ (and (eq_attr "type"
+ "!alu1,negnot,
+ imov,imovx,icmp,test,
+ fmov,fcmp,fsgn,
+ sse,ssemov,ssecmp,ssecvt,
+ mmx,mmxmov,mmxcmp,mmxcvt")
(match_operand 2 "memory_operand" ""))
(const_string "load")
(and (eq_attr "type" "icmov")
@@ -284,11 +347,11 @@
(define_attr "imm_disp" "false,true,unknown"
(cond [(eq_attr "type" "other,multi")
(const_string "unknown")
- (and (eq_attr "type" "icmp,test,imov")
+ (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
(and (match_operand 0 "memory_displacement_operand" "")
(match_operand 1 "immediate_operand" "")))
(const_string "true")
- (and (eq_attr "type" "alu,ishift,imul,idiv")
+ (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
(and (match_operand 0 "memory_displacement_operand" "")
(match_operand 2 "immediate_operand" "")))
(const_string "true")
@@ -305,710 +368,10 @@
[(set_attr "length" "128")
(set_attr "type" "multi")])
-;; Pentium Scheduling
-;;
-;; The Pentium is an in-order core with two integer pipelines.
-
-;; True for insns that behave like prefixed insns on the Pentium.
-(define_attr "pent_prefix" "false,true"
- (if_then_else (ior (eq_attr "prefix_0f" "1")
- (ior (eq_attr "prefix_data16" "1")
- (eq_attr "prefix_rep" "1")))
- (const_string "true")
- (const_string "false")))
-
-;; Categorize how an instruction slots.
-
-;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
-;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium
-;; rules, because it results in noticeably better code on non-MMX Pentium
-;; and doesn't hurt much on MMX. (Prefixed instructions are not very
-;; common, so the scheduler usualy has a non-prefixed insn to pair).
-
-(define_attr "pent_pair" "uv,pu,pv,np"
- (cond [(eq_attr "imm_disp" "true")
- (const_string "np")
- (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
- (and (eq_attr "type" "pop,push")
- (eq_attr "memory" "!both")))
- (if_then_else (eq_attr "pent_prefix" "true")
- (const_string "pu")
- (const_string "uv"))
- (eq_attr "type" "ibr")
- (const_string "pv")
- (and (eq_attr "type" "ishift")
- (match_operand 2 "const_int_operand" ""))
- (const_string "pu")
- (and (eq_attr "type" "call")
- (match_operand 0 "constant_call_address_operand" ""))
- (const_string "pv")
- (and (eq_attr "type" "callv")
- (match_operand 1 "constant_call_address_operand" ""))
- (const_string "pv")
- ]
- (const_string "np")))
-
-;; Rough readiness numbers. Fine tuning happens in i386.c.
-;;
-;; u describes pipe U
-;; v describes pipe V
-;; uv describes either pipe U or V for those that can issue to either
-;; np describes not paring
-;; fpu describes fpu
-;; fpm describes fp insns of different types are not pipelined.
-;;
-;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "imul"))
- 11 11)
-
-(define_function_unit "pent_mul" 1 1
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "imul"))
- 11 11)
-
-;; Rep movs takes minimally 12 cycles.
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "str"))
- 12 12)
-
-; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "idiv"))
- 46 46)
-
-; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode,
-; 3 cycles for XFmode. Stores takes 2 cycles for SF/DF and 3 for XF.
-; fldz and fld1 takes 2 cycles. Only reg-reg moves are pairable.
-; The integer <-> fp conversion is not modeled correctly. Fild behaves
-; like normal fp operation and fist takes 6 cycles.
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load,store")
- (eq_attr "mode" "XF"))))
- 3 3)
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load,store")
- (eq_attr "mode" "XF"))))
- 3 3)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (ior (match_operand 1 "immediate_operand" "")
- (eq_attr "memory" "store"))))
- 2 2)
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (ior (match_operand 1 "immediate_operand" "")
- (eq_attr "memory" "store"))))
- 2 2)
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "cld"))
- 2 2)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "fmov")
- (eq_attr "memory" "none,load")))
- 1 1)
-
-; Read/Modify/Write instructions usually take 3 cycles.
-(define_function_unit "pent_u" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "pu")
- (eq_attr "memory" "both"))))
- 3 3)
-
-(define_function_unit "pent_uv" 2 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "both"))))
- 3 3)
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,negnot,ishift")
- (and (eq_attr "pent_pair" "np")
- (eq_attr "memory" "both"))))
- 3 3)
-
-; Read/Modify or Modify/Write instructions usually take 2 cycles.
-(define_function_unit "pent_u" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,ishift")
- (and (eq_attr "pent_pair" "pu")
- (eq_attr "memory" "load,store"))))
- 2 2)
-
-(define_function_unit "pent_uv" 2 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "load,store"))))
- 2 2)
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,ishift")
- (and (eq_attr "pent_pair" "np")
- (eq_attr "memory" "load,store"))))
- 2 2)
-
-; Insns w/o memory operands and move instructions usually take one cycle.
-(define_function_unit "pent_u" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "pu"))
- 1 1)
-
-(define_function_unit "pent_v" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "pv"))
- 1 1)
-
-(define_function_unit "pent_uv" 2 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "!np"))
- 1 1)
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "np"))
- 1 1)
-
-; Pairable insns only conflict with other non-pairable insns.
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "both"))))
- 3 3
- [(eq_attr "pent_pair" "np")])
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (and (eq_attr "type" "alu,alu1,ishift")
- (and (eq_attr "pent_pair" "!np")
- (eq_attr "memory" "load,store"))))
- 2 2
- [(eq_attr "pent_pair" "np")])
-
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "pent_pair" "!np"))
- 1 1
- [(eq_attr "pent_pair" "np")])
-
-; Floating point instructions usually blocks cycle longer when combined with
-; integer instructions, because of the inpaired fxch instruction.
-(define_function_unit "pent_np" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp"))
- 2 2
- [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")])
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fcmp,fxch,fsgn"))
- 1 1)
-
-; Addition takes 3 cycles; assume other random cruft does as well.
-; ??? Trivial fp operations such as fabs or fchs takes only one cycle.
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fop,fop1,fistp"))
- 3 1)
-
-; Multiplication takes 3 cycles and is only half pipelined.
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fmul"))
- 3 1)
-
-(define_function_unit "pent_mul" 1 1
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fmul"))
- 2 2)
-
-; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles.
-; They can overlap with integer insns. Only the last two cycles can overlap
-; with other fp insns. Only fsin/fcos can overlap with multiplies.
-; Only last two cycles of fsin/fcos can overlap with other instructions.
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fdiv"))
- 39 37)
-
-(define_function_unit "pent_mul" 1 1
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fdiv"))
- 39 39)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fpspc"))
- 70 68)
-
-(define_function_unit "pent_mul" 1 1
- (and (eq_attr "cpu" "pentium")
- (eq_attr "type" "fpspc"))
- 70 70)
-
-;; Pentium Pro/PII Scheduling
-;;
-;; The PPro has an out-of-order core, but the instruction decoders are
-;; naturally in-order and asymmetric. We get best performance by scheduling
-;; for the decoders, for in doing so we give the oo execution unit the
-;; most choices.
-
-;; Categorize how many uops an ia32 instruction evaluates to:
-;; one -- an instruction with 1 uop can be decoded by any of the
-;; three decoders.
-;; few -- an instruction with 1 to 4 uops can be decoded only by
-;; decoder 0.
-;; many -- a complex instruction may take an unspecified number of
-;; cycles to decode in decoder 0.
-
-(define_attr "ppro_uops" "one,few,many"
- (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
- (const_string "many")
- (eq_attr "type" "icmov,fcmov,str,cld")
- (const_string "few")
- (eq_attr "type" "imov")
- (if_then_else (eq_attr "memory" "store,both")
- (const_string "few")
- (const_string "one"))
- (eq_attr "memory" "!none")
- (const_string "few")
- ]
- (const_string "one")))
-
-;; Rough readiness numbers. Fine tuning happens in i386.c.
-;;
-;; p0 describes port 0.
-;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either.
-;; p2 describes port 2 for loads.
-;; p34 describes ports 3 and 4 for stores.
-;; fpu describes the fpu accessed via port 0.
-;; ??? It is less than clear if there are separate fadd and fmul units
-;; that could operate in parallel.
-;;
-;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "ishift,lea,ibr,cld"))
- 1 1)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "imul"))
- 4 1)
-
-;; ??? Does the divider lock out the pipe while it works,
-;; or is there a disconnected unit?
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "idiv"))
- 17 17)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fop,fop1,fsgn,fistp"))
- 3 1)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fcmov"))
- 2 1)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fcmp"))
- 1 1)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fmov"))
- 1 1)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fmul"))
- 5 1)
-
-(define_function_unit "ppro_p0" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fdiv,fpspc"))
- 56 1)
-
-(define_function_unit "ppro_p01" 2 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "!imov,fmov"))
- 1 1)
-
-(define_function_unit "ppro_p01" 2 0
- (and (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "imov,fmov"))
- (eq_attr "memory" "none"))
- 1 1)
-
-(define_function_unit "ppro_p2" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (ior (eq_attr "type" "pop")
- (eq_attr "memory" "load,both")))
- 3 1)
-
-(define_function_unit "ppro_p34" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (ior (eq_attr "type" "push")
- (eq_attr "memory" "store,both")))
- 1 1)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp"))
- 1 1)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fmul"))
- 5 2)
-
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "fdiv,fpspc"))
- 56 56)
-
-;; imul uses the fpu. ??? does it have the same throughput as fmul?
-(define_function_unit "fpu" 1 0
- (and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "imul"))
- 4 1)
-
-;; AMD K6/K6-2 Scheduling
-;;
-;; The K6 has similar architecture to PPro. Important difference is, that
-;; there are only two decoders and they seems to be much slower than execution
-;; units. So we have to pay much more attention to proper decoding for
-;; schedulers. We share most of scheduler code for PPro in i386.c
-;;
-;; The fp unit is not pipelined and do one operation per two cycles including
-;; the FXCH.
-;;
-;; alu describes both ALU units (ALU-X and ALU-Y).
-;; alux describes X alu unit
-;; fpu describes FPU unit
-;; load describes load unit.
-;; branch describes branch unit.
-;; store decsribes store unit. This unit is not modelled completely and only
-;; used to model lea operation. Otherwise it lie outside of the critical
-;; path.
-;;
-;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
-
-;; The decoder specification is in the PPro section above!
-
-;; Shift instructions and certain arithmetic are issued only to X pipe.
-(define_function_unit "k6_alux" 1 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "ishift,alu1,negnot,cld"))
- 1 1)
-
-;; The QI mode arithmetic is issued to X pipe only.
-(define_function_unit "k6_alux" 1 0
- (and (eq_attr "cpu" "k6")
- (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec")
- (match_operand:QI 0 "general_operand" "")))
- 1 1)
-
-(define_function_unit "k6_alu" 2 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea"))
- 1 1)
-
-(define_function_unit "k6_alu" 2 0
- (and (eq_attr "cpu" "k6")
- (and (eq_attr "type" "imov")
- (eq_attr "memory" "none")))
- 1 1)
-
-(define_function_unit "k6_branch" 1 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "call,callv,ibr"))
- 1 1)
-
-;; Load unit have two cycle latency, but we take care for it in adjust_cost
-(define_function_unit "k6_load" 1 0
- (and (eq_attr "cpu" "k6")
- (ior (eq_attr "type" "pop")
- (eq_attr "memory" "load,both")))
- 1 1)
-
-(define_function_unit "k6_load" 1 0
- (and (eq_attr "cpu" "k6")
- (and (eq_attr "type" "str")
- (eq_attr "memory" "load,both")))
- 10 10)
-
-;; Lea have two instructions, so latency is probably 2
-(define_function_unit "k6_store" 1 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "lea"))
- 2 1)
-
-(define_function_unit "k6_store" 1 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "str"))
- 10 10)
-
-(define_function_unit "k6_store" 1 0
- (and (eq_attr "cpu" "k6")
- (ior (eq_attr "type" "push")
- (eq_attr "memory" "store,both")))
- 1 1)
-
-(define_function_unit "k6_fpu" 1 1
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "fop,fop1,fmov,fcmp,fistp"))
- 2 2)
-
-(define_function_unit "k6_fpu" 1 1
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "fmul"))
- 2 2)
-
-;; ??? Guess
-(define_function_unit "k6_fpu" 1 1
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "fdiv,fpspc"))
- 56 56)
-
-(define_function_unit "k6_alu" 2 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "imul"))
- 2 2)
-
-(define_function_unit "k6_alux" 1 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "imul"))
- 2 2)
-
-;; ??? Guess
-(define_function_unit "k6_alu" 2 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "idiv"))
- 17 17)
-
-(define_function_unit "k6_alux" 1 0
- (and (eq_attr "cpu" "k6")
- (eq_attr "type" "idiv"))
- 17 17)
-
-;; AMD Athlon Scheduling
-;;
-;; The Athlon does contain three pipelined FP units, three integer units and
-;; three address generation units.
-;;
-;; The predecode logic is determining boundaries of instructions in the 64
-;; byte cache line. So the cache line straddling problem of K6 might be issue
-;; here as well, but it is not noted in the documentation.
-;;
-;; Three DirectPath instructions decoders and only one VectorPath decoder
-;; is available. They can decode three DirectPath instructions or one VectorPath
-;; instruction per cycle.
-;; Decoded macro instructions are then passed to 72 entry instruction control
-;; unit, that passes
-;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
-;;
-;; The load/store queue unit is not attached to the schedulers but
-;; communicates with all the execution units separately instead.
-
-(define_attr "athlon_decode" "direct,vector"
- (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
- (const_string "vector")
- (and (eq_attr "type" "push")
- (match_operand 1 "memory_operand" ""))
- (const_string "vector")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load,store")
- (eq_attr "mode" "XF")))
- (const_string "vector")]
- (const_string "direct")))
-
-(define_function_unit "athlon_vectordec" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "vector"))
- 1 1)
-
-(define_function_unit "athlon_directdec" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "direct"))
- 1 1)
-
-(define_function_unit "athlon_vectordec" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_decode" "direct"))
- 1 1 [(eq_attr "athlon_decode" "vector")])
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
- 1 1)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "str"))
- 15 15)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "imul"))
- 5 0)
-
-(define_function_unit "athlon_ieu" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "idiv"))
- 42 0)
-
-(define_function_unit "athlon_muldiv" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "imul"))
- 5 0)
-
-(define_function_unit "athlon_muldiv" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "idiv"))
- 42 42)
-
-(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
- (cond [(eq_attr "type" "fop,fop1,fcmp,fistp")
- (const_string "add")
- (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
- (const_string "mul")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
- (const_string "store")
- (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
- (const_string "any")
- (and (eq_attr "type" "fmov")
- (ior (match_operand:SI 1 "register_operand" "")
- (match_operand 1 "immediate_operand" "")))
- (const_string "store")
- (eq_attr "type" "fmov")
- (const_string "muladd")]
- (const_string "none")))
-
-;; We use latencies 1 for definitions. This is OK to model colisions
-;; in execution units. The real latencies are modeled in the "fp" pipeline.
-
-;; fsin, fcos: 96-192
-;; fsincos: 107-211
-;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fpspc"))
- 100 1)
-
-;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fdiv"))
- 24 1)
-
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fop,fop1,fmul,fistp"))
- 4 1)
-
-;; XFmode loads are slow.
-;; XFmode store is slow too (8 cycles), but we don't need to model it, because
-;; there are no dependent instructions.
-
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fmov")
- (and (eq_attr "memory" "load")
- (eq_attr "mode" "XF"))))
- 10 1)
-
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fmov,fsgn"))
- 2 1)
-
-;; fcmp and ftst instructions
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fcmp")
- (eq_attr "athlon_decode" "direct")))
- 3 1)
-
-;; fcmpi instructions.
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (and (eq_attr "type" "fcmp")
- (eq_attr "athlon_decode" "vector")))
- 3 1)
-
-(define_function_unit "athlon_fp" 3 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "type" "fcmov"))
- 7 1)
-
-(define_function_unit "athlon_fp_mul" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "mul"))
- 1 1)
-
-(define_function_unit "athlon_fp_add" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "add"))
- 1 1)
-
-(define_function_unit "athlon_fp_muladd" 2 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "muladd,mul,add"))
- 1 1)
-
-(define_function_unit "athlon_fp_store" 1 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "athlon_fpunits" "store"))
- 1 1)
-
-;; We don't need to model the Address Generation Unit, since we don't model
-;; the re-order buffer yet and thus we never schedule more than three operations
-;; at time. Later we may want to experiment with MD_SCHED macros modeling the
-;; decoders independently on the functional units.
-
-;(define_function_unit "athlon_agu" 3 0
-; (and (eq_attr "cpu" "athlon")
-; (and (eq_attr "memory" "!none")
-; (eq_attr "athlon_fpunits" "none")))
-; 1 1)
-
-;; Model load unit to avoid too long sequences of loads. We don't need to
-;; model store queue, since it is hardly going to be bottleneck.
-
-(define_function_unit "athlon_load" 2 0
- (and (eq_attr "cpu" "athlon")
- (eq_attr "memory" "load,both"))
- 1 1)
-
+(include "pentium.md")
+(include "ppro.md")
+(include "k6.md")
+(include "athlon.md")
;; Compare instructions.
@@ -1382,7 +745,8 @@
[(set (match_operand:HI 0 "register_operand" "=a")
(unspec:HI
[(compare:CCFP (match_operand 1 "register_operand" "f")
- (match_operand 2 "const0_operand" "X"))] 9))]
+ (match_operand 2 "const0_operand" "X"))]
+ UNSPEC_FNSTSW))]
"TARGET_80387
&& FLOAT_MODE_P (GET_MODE (operands[1]))
&& GET_MODE (operands[1]) == GET_MODE (operands[2])"
@@ -1413,7 +777,8 @@
(unspec:HI
[(compare:CCFP
(match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "nonimmediate_operand" "fm"))] 9))]
+ (match_operand:SF 2 "nonimmediate_operand" "fm"))]
+ UNSPEC_FNSTSW))]
"TARGET_80387"
"* return output_fp_compare (insn, operands, 2, 0);"
[(set_attr "type" "fcmp")
@@ -1434,7 +799,8 @@
(unspec:HI
[(compare:CCFP
(match_operand:DF 1 "register_operand" "f")
- (match_operand:DF 2 "nonimmediate_operand" "fm"))] 9))]
+ (match_operand:DF 2 "nonimmediate_operand" "fm"))]
+ UNSPEC_FNSTSW))]
"TARGET_80387"
"* return output_fp_compare (insn, operands, 2, 0);"
[(set_attr "type" "multi")
@@ -1465,7 +831,8 @@
(unspec:HI
[(compare:CCFP
(match_operand:XF 1 "register_operand" "f")
- (match_operand:XF 2 "register_operand" "f"))] 9))]
+ (match_operand:XF 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
"!TARGET_64BIT && TARGET_80387"
"* return output_fp_compare (insn, operands, 2, 0);"
[(set_attr "type" "multi")
@@ -1476,7 +843,8 @@
(unspec:HI
[(compare:CCFP
(match_operand:TF 1 "register_operand" "f")
- (match_operand:TF 2 "register_operand" "f"))] 9))]
+ (match_operand:TF 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
"TARGET_80387"
"* return output_fp_compare (insn, operands, 2, 0);"
[(set_attr "type" "multi")
@@ -1499,7 +867,8 @@
(unspec:HI
[(compare:CCFPU
(match_operand 1 "register_operand" "f")
- (match_operand 2 "register_operand" "f"))] 9))]
+ (match_operand 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
"TARGET_80387
&& FLOAT_MODE_P (GET_MODE (operands[1]))
&& GET_MODE (operands[1]) == GET_MODE (operands[2])"
@@ -1547,12 +916,12 @@
(define_insn "x86_fnstsw_1"
[(set (match_operand:HI 0 "register_operand" "=a")
- (unspec:HI [(reg 18)] 9))]
+ (unspec:HI [(reg 18)] UNSPEC_FNSTSW))]
"TARGET_80387"
"fnstsw\t%0"
[(set_attr "length" "2")
(set_attr "mode" "SI")
- (set_attr "i387" "1")
+ (set_attr "unit" "i387")
(set_attr "ppro_uops" "few")])
;; FP compares, step 3
@@ -1560,7 +929,7 @@
(define_insn "x86_sahf_1"
[(set (reg:CC 17)
- (unspec:CC [(match_operand:HI 0 "register_operand" "a")] 10))]
+ (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))]
"!TARGET_64BIT"
"sahf"
[(set_attr "length" "1")
@@ -1591,7 +960,7 @@
&& SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
- [(set_attr "type" "fcmp,sse")
+ [(set_attr "type" "fcmp,ssecmp")
(set_attr "mode" "unknownfp")
(set_attr "athlon_decode" "vector")])
@@ -1602,7 +971,7 @@
"SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[0])"
"* return output_fp_compare (insn, operands, 1, 0);"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecmp")
(set_attr "mode" "unknownfp")
(set_attr "athlon_decode" "vector")])
@@ -1627,7 +996,7 @@
&& SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
- [(set_attr "type" "fcmp,sse")
+ [(set_attr "type" "fcmp,ssecmp")
(set_attr "mode" "unknownfp")
(set_attr "athlon_decode" "vector")])
@@ -1638,7 +1007,7 @@
"SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecmp")
(set_attr "mode" "unknownfp")
(set_attr "athlon_decode" "vector")])
@@ -1733,25 +1102,20 @@
(set_attr "mode" "SI")
(set_attr "length_immediate" "1")])
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
(define_insn "*movsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
- (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm")
+ (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
{
switch (get_attr_type (insn))
{
- case TYPE_SSE:
- if (get_attr_mode (insn) == TImode)
+ case TYPE_SSEMOV:
+ if (get_attr_mode (insn) == MODE_TI)
return "movdqa\t{%1, %0|%0, %1}";
return "movd\t{%1, %0|%0, %1}";
- case TYPE_MMX:
- if (get_attr_mode (insn) == DImode)
+ case TYPE_MMXMOV:
+ if (get_attr_mode (insn) == MODE_DI)
return "movq\t{%1, %0|%0, %1}";
return "movd\t{%1, %0|%0, %1}";
@@ -1759,46 +1123,44 @@
return "lea{l}\t{%1, %0|%0, %1}";
default:
- if (flag_pic && SYMBOLIC_CONST (operands[1]))
+ if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
abort();
return "mov{l}\t{%1, %0|%0, %1}";
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "4,5,6")
- (const_string "mmx")
- (eq_attr "alternative" "7,8,9")
- (const_string "sse")
+ (cond [(eq_attr "alternative" "2,3,4")
+ (const_string "mmxmov")
+ (eq_attr "alternative" "5,6,7")
+ (const_string "ssemov")
(and (ne (symbol_ref "flag_pic") (const_int 0))
(match_operand:SI 1 "symbolic_operand" ""))
(const_string "lea")
]
(const_string "imov")))
- (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
- (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")])
+ (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")])
;; Stores and loads of ax to arbitary constant address.
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabssi_1_rex64"
- [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:SI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:SI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{l}\t{%1, %P0|%P0, %1}
- mov{l}\t{%1, %a0|%a0, %1}
- movabs{l}\t{%1, %a0|%a0, %1}"
+ mov{l}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "SI")])
(define_insn "*movabssi_2_rex64"
[(set (match_operand:SI 0 "register_operand" "=a,r")
(mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{l}\t{%P1, %0|%0, %P1}
mov{l}\t{%a1, %0|%0, %a1}"
@@ -1848,14 +1210,9 @@
[(set_attr "type" "push")
(set_attr "mode" "QI")])
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
(define_insn "*movhi_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m")
- (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+ (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
{
switch (get_attr_type (insn))
@@ -1872,59 +1229,57 @@
}
}
[(set (attr "type")
- (cond [(and (eq_attr "alternative" "0,1")
+ (cond [(and (eq_attr "alternative" "0")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "imov")
- (and (eq_attr "alternative" "2,3,4")
+ (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
(const_string "imov")
(and (ne (symbol_ref "TARGET_MOVX")
(const_int 0))
- (eq_attr "alternative" "0,1,3,4"))
+ (eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
- (and (eq_attr "alternative" "2,3,4")
+ (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
(const_string "SI")
- (and (eq_attr "alternative" "0,1")
+ (and (eq_attr "alternative" "0")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "SI")
]
- (const_string "HI")))
- (set_attr "modrm" "0,*,*,0,*,*")])
+ (const_string "HI")))])
;; Stores and loads of ax to arbitary constant address.
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabshi_1_rex64"
- [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:HI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:HI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{w}\t{%1, %P0|%P0, %1}
- mov{w}\t{%1, %a0|%a0, %1}
- movabs{w}\t{%1, %a0|%a0, %1}"
+ mov{w}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "HI")])
(define_insn "*movabshi_2_rex64"
[(set (match_operand:HI 0 "register_operand" "=a,r")
(mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{w}\t{%P1, %0|%0, %P1}
mov{w}\t{%a1, %0|%0, %a1}"
@@ -2122,7 +1477,7 @@
(define_expand "movstrictqi"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
(match_operand:QI 1 "general_operand" ""))]
- "! TARGET_PARTIAL_REG_STALL"
+ "! TARGET_PARTIAL_REG_STALL || optimize_size"
{
/* Don't generate memory->memory moves, go through a register. */
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
@@ -2132,7 +1487,7 @@
(define_insn "*movstrictqi_1"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
(match_operand:QI 1 "general_operand" "*qn,m"))]
- "! TARGET_PARTIAL_REG_STALL
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"mov{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "imov")
@@ -2226,24 +1581,23 @@
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabsqi_1_rex64"
- [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:QI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:QI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{b}\t{%1, %P0|%P0, %1}
- mov{b}\t{%1, %a0|%a0, %1}
- movabs{b}\t{%1, %a0|%a0, %1}"
+ mov{b}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "QI")])
(define_insn "*movabsqi_2_rex64"
[(set (match_operand:QI 0 "register_operand" "=a,r")
(mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{b}\t{%P1, %0|%0, %P1}
mov{b}\t{%a1, %0|%0, %a1}"
@@ -2484,7 +1838,7 @@
movq\t{%1, %0|%0, %1}
movdqa\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}"
- [(set_attr "type" "*,*,mmx,mmx,sse,sse,sse")
+ [(set_attr "type" "*,*,mmx,mmx,ssemov,ssemov,ssemov")
(set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")])
(define_split
@@ -2513,19 +1867,19 @@
{
switch (get_attr_type (insn))
{
- case TYPE_SSE:
+ case TYPE_SSEMOV:
if (register_operand (operands[0], DImode)
&& register_operand (operands[1], DImode))
return "movdqa\t{%1, %0|%0, %1}";
/* FALLTHRU */
- case TYPE_MMX:
+ case TYPE_MMXMOV:
return "movq\t{%1, %0|%0, %1}";
case TYPE_MULTI:
return "#";
case TYPE_LEA:
return "lea{q}\t{%a1, %0|%0, %a1}";
default:
- if (flag_pic && SYMBOLIC_CONST (operands[1]))
+ if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
abort ();
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2537,9 +1891,9 @@
}
[(set (attr "type")
(cond [(eq_attr "alternative" "5,6")
- (const_string "mmx")
- (eq_attr "alternative" "7,8")
- (const_string "sse")
+ (const_string "mmxmov")
+ (eq_attr "alternative" "7,8,9")
+ (const_string "ssemov")
(eq_attr "alternative" "4")
(const_string "multi")
(and (ne (symbol_ref "flag_pic") (const_int 0))
@@ -2557,7 +1911,7 @@
(define_insn "*movabsdi_1_rex64"
[(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
(match_operand:DI 1 "nonmemory_operand" "a,er"))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{q}\t{%1, %P0|%P0, %1}
mov{q}\t{%1, %a0|%a0, %1}"
@@ -2571,7 +1925,7 @@
(define_insn "*movabsdi_2_rex64"
[(set (match_operand:DI 0 "register_operand" "=a,r")
(mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{q}\t{%P1, %0|%0, %P1}
mov{q}\t{%a1, %0|%0, %a1}"
@@ -2711,21 +2065,21 @@
;; %%% Kill this when call knows how to work this out.
(define_split
[(set (match_operand:SF 0 "push_operand" "")
- (match_operand:SF 1 "register_operand" ""))]
- "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:SF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
(set (mem:SF (reg:SI 7)) (match_dup 1))])
(define_split
[(set (match_operand:SF 0 "push_operand" "")
- (match_operand:SF 1 "register_operand" ""))]
- "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:SF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT"
[(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8)))
(set (mem:SF (reg:DI 7)) (match_dup 1))])
(define_insn "*movsf_1"
[(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y")
- (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf,rm,*y,*y"))]
+ (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& (reload_in_progress || reload_completed
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2763,7 +2117,7 @@
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
- if (TARGET_SSE2)
+ if (TARGET_SSE2 && !TARGET_ATHLON)
return "pxor\t%0, %0";
else
return "xorps\t%0, %0";
@@ -2787,7 +2141,7 @@
abort();
}
}
- [(set_attr "type" "fmov,fmov,fmov,imov,imov,sse,sse,sse,sse,mmx,mmx,mmx")
+ [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
(set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
(define_insn "*swapsf"
@@ -2883,16 +2237,16 @@
;; %%% Kill this when call knows how to work this out.
(define_split
[(set (match_operand:DF 0 "push_operand" "")
- (match_operand:DF 1 "register_operand" ""))]
- "!TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:DF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT && reload_completed"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
(set (mem:DF (reg:SI 7)) (match_dup 1))]
"")
(define_split
[(set (match_operand:DF 0 "push_operand" "")
- (match_operand:DF 1 "register_operand" ""))]
- "TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:DF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT && reload_completed"
[(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8)))
(set (mem:DF (reg:DI 7)) (match_dup 1))]
"")
@@ -2910,7 +2264,7 @@
(define_insn "*movdf_nointeger"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m")
- (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))]
+ (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& (optimize_size || !TARGET_INTEGER_DFMODE_MOVES)
&& (reload_in_progress || reload_completed
@@ -2949,7 +2303,10 @@
case 4:
return "#";
case 5:
- return "pxor\t%0, %0";
+ if (TARGET_ATHLON)
+ return "xorpd\t%0, %0";
+ else
+ return "pxor\t%0, %0";
case 6:
if (TARGET_PARTIAL_REG_DEPENDENCY)
return "movapd\t{%1, %0|%0, %1}";
@@ -2963,12 +2320,12 @@
abort();
}
}
- [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse")
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
(set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
- (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))]
+ (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& !optimize_size && TARGET_INTEGER_DFMODE_MOVES
&& (reload_in_progress || reload_completed
@@ -3008,7 +2365,10 @@
return "#";
case 5:
- return "pxor\t%0, %0";
+ if (TARGET_ATHLON)
+ return "xorpd\t%0, %0";
+ else
+ return "pxor\t%0, %0";
case 6:
if (TARGET_PARTIAL_REG_DEPENDENCY)
return "movapd\t{%1, %0|%0, %1}";
@@ -3022,7 +2382,7 @@
abort();
}
}
- [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse")
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
(set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
(define_split
@@ -3196,28 +2556,28 @@
&& (GET_MODE (operands[0]) == XFmode
|| GET_MODE (operands[0]) == TFmode
|| GET_MODE (operands[0]) == DFmode)
- && (!REG_P (operands[1]) || !ANY_FP_REGNO_P (REGNO (operands[1])))"
+ && !ANY_FP_REG_P (operands[1])"
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
(define_split
[(set (match_operand:XF 0 "push_operand" "")
- (match_operand:XF 1 "register_operand" ""))]
- "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:XF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12)))
(set (mem:XF (reg:SI 7)) (match_dup 1))])
(define_split
[(set (match_operand:TF 0 "push_operand" "")
- (match_operand:TF 1 "register_operand" ""))]
- "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:TF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
(set (mem:TF (reg:SI 7)) (match_dup 1))])
(define_split
[(set (match_operand:TF 0 "push_operand" "")
- (match_operand:TF 1 "register_operand" ""))]
- "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))"
+ (match_operand:TF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT"
[(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16)))
(set (mem:TF (reg:DI 7)) (match_dup 1))])
@@ -3969,15 +3329,15 @@
(define_split
[(set (match_operand:DF 0 "push_operand" "")
- (float_extend:DF (match_operand:SF 1 "register_operand" "")))]
- "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
(set (mem:DF (reg:SI 7)) (float_extend:DF (match_dup 1)))])
(define_split
[(set (match_operand:DF 0 "push_operand" "")
- (float_extend:DF (match_operand:SF 1 "register_operand" "")))]
- "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+ "TARGET_64BIT"
[(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8)))
(set (mem:DF (reg:DI 7)) (float_extend:DF (match_dup 1)))])
@@ -3989,8 +3349,8 @@
(define_split
[(set (match_operand:XF 0 "push_operand" "")
- (float_extend:XF (match_operand:SF 1 "register_operand" "")))]
- "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12)))
(set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))])
@@ -4002,15 +3362,15 @@
(define_split
[(set (match_operand:TF 0 "push_operand" "")
- (float_extend:TF (match_operand:SF 1 "register_operand" "")))]
- "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
(set (mem:TF (reg:SI 7)) (float_extend:TF (match_dup 1)))])
(define_split
[(set (match_operand:TF 0 "push_operand" "")
- (float_extend:TF (match_operand:SF 1 "register_operand" "")))]
- "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))]
+ "TARGET_64BIT"
[(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16)))
(set (mem:DF (reg:DI 7)) (float_extend:TF (match_dup 1)))])
@@ -4022,8 +3382,8 @@
(define_split
[(set (match_operand:XF 0 "push_operand" "")
- (float_extend:XF (match_operand:DF 1 "register_operand" "")))]
- "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12)))
(set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))])
@@ -4035,23 +3395,27 @@
(define_split
[(set (match_operand:TF 0 "push_operand" "")
- (float_extend:TF (match_operand:DF 1 "register_operand" "")))]
- "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))]
+ "!TARGET_64BIT"
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
(set (mem:TF (reg:SI 7)) (float_extend:XF (match_dup 1)))])
(define_split
[(set (match_operand:TF 0 "push_operand" "")
- (float_extend:TF (match_operand:DF 1 "register_operand" "")))]
- "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))"
+ (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))]
+ "TARGET_64BIT"
[(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16)))
(set (mem:TF (reg:DI 7)) (float_extend:TF (match_dup 1)))])
(define_expand "extendsfdf2"
[(set (match_operand:DF 0 "nonimmediate_operand" "")
- (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))]
+ (float_extend:DF (match_operand:SF 1 "general_operand" "")))]
"TARGET_80387 || TARGET_SSE2"
{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (SFmode, operands[1]);
})
@@ -4086,7 +3450,7 @@
abort ();
}
}
- [(set_attr "type" "fmov,fmov,sse")
+ [(set_attr "type" "fmov,fmov,ssecvt")
(set_attr "mode" "SF,XF,DF")])
(define_insn "*extendsfdf2_1_sse_only"
@@ -4095,14 +3459,18 @@
"!TARGET_80387 && TARGET_SSE2
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"cvtss2sd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
(define_expand "extendsfxf2"
[(set (match_operand:XF 0 "nonimmediate_operand" "")
- (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "")))]
+ (float_extend:XF (match_operand:SF 1 "general_operand" "")))]
"!TARGET_64BIT && TARGET_80387"
{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (SFmode, operands[1]);
})
@@ -4141,9 +3509,13 @@
(define_expand "extendsftf2"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
- (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "")))]
+ (float_extend:TF (match_operand:SF 1 "general_operand" "")))]
"TARGET_80387"
{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (SFmode, operands[1]);
})
@@ -4182,9 +3554,13 @@
(define_expand "extenddfxf2"
[(set (match_operand:XF 0 "nonimmediate_operand" "")
- (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "")))]
+ (float_extend:XF (match_operand:DF 1 "general_operand" "")))]
"!TARGET_64BIT && TARGET_80387"
{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (DFmode, operands[1]);
})
@@ -4223,9 +3599,13 @@
(define_expand "extenddftf2"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
- (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "")))]
+ (float_extend:TF (match_operand:DF 1 "general_operand" "")))]
"TARGET_80387"
{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
operands[1] = force_reg (DFmode, operands[1]);
})
@@ -4325,7 +3705,7 @@
abort ();
}
}
- [(set_attr "type" "fmov,multi,multi,multi,sse")
+ [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
(set_attr "mode" "SF,SF,SF,SF,DF")])
(define_insn "*truncdfsf2_2"
@@ -4348,7 +3728,7 @@
abort ();
}
}
- [(set_attr "type" "sse,fmov")
+ [(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
(define_insn "truncdfsf2_3"
@@ -4371,7 +3751,7 @@
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
"!TARGET_80387 && TARGET_SSE2"
"cvtsd2ss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
(define_split
@@ -4396,10 +3776,9 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
- (match_operand:DF 1 "register_operand" "")))
+ (match_operand:DF 1 "fp_register_operand" "")))
(clobber (match_operand:SF 2 "memory_operand" ""))]
- "TARGET_80387 && reload_completed
- && FP_REG_P (operands[1])"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 2) (float_truncate:SF (match_dup 1)))
(set (match_dup 0) (match_dup 2))]
"")
@@ -4788,14 +4167,14 @@
(fix:DI (match_operand:SF 1 "nonimmediate_operand" "xm")))]
"TARGET_64BIT && TARGET_SSE"
"cvttss2si{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")])
(define_insn "fix_truncdfdi_sse"
[(set (match_operand:DI 0 "register_operand" "=r")
(fix:DI (match_operand:DF 1 "nonimmediate_operand" "Ym")))]
"TARGET_64BIT && TARGET_SSE2"
"cvttsd2si{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")])
;; Signed conversion to SImode.
@@ -4896,14 +4275,14 @@
(fix:SI (match_operand:SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")])
(define_insn "fix_truncdfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r")
(fix:SI (match_operand:DF 1 "nonimmediate_operand" "Ym")))]
"TARGET_SSE2"
"cvttsd2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")])
(define_split
[(set (match_operand:SI 0 "register_operand" "")
@@ -5034,22 +4413,22 @@
;; %% Not used yet.
(define_insn "x86_fnstcw_1"
[(set (match_operand:HI 0 "memory_operand" "=m")
- (unspec:HI [(reg:HI 18)] 11))]
+ (unspec:HI [(reg:HI 18)] UNSPEC_FSTCW))]
"TARGET_80387"
"fnstcw\t%0"
[(set_attr "length" "2")
(set_attr "mode" "HI")
- (set_attr "i387" "1")
+ (set_attr "unit" "i387")
(set_attr "ppro_uops" "few")])
(define_insn "x86_fldcw_1"
[(set (reg:HI 18)
- (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] 12))]
+ (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
"TARGET_80387"
"fldcw\t%0"
[(set_attr "length" "2")
(set_attr "mode" "HI")
- (set_attr "i387" "1")
+ (set_attr "unit" "i387")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
@@ -5083,7 +4462,7 @@
fild%z1\t%1
#
cvtsi2ss\t{%1, %0|%0, %1}"
- [(set_attr "type" "fmov,multi,sse")
+ [(set_attr "type" "fmov,multi,ssecvt")
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
@@ -5092,7 +4471,7 @@
(float:SF (match_operand:SI 1 "nonimmediate_operand" "mr")))]
"TARGET_SSE"
"cvtsi2ss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecvt")
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
@@ -5121,7 +4500,7 @@
fild%z1\t%1
#
cvtsi2ss{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "fmov,multi,sse")
+ [(set_attr "type" "fmov,multi,ssecvt")
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
@@ -5130,7 +4509,7 @@
(float:SF (match_operand:DI 1 "nonimmediate_operand" "mr")))]
"TARGET_64BIT && TARGET_SSE"
"cvtsi2ss{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecvt")
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
@@ -5159,7 +4538,7 @@
fild%z1\t%1
#
cvtsi2sd\t{%1, %0|%0, %1}"
- [(set_attr "type" "fmov,multi,sse")
+ [(set_attr "type" "fmov,multi,ssecvt")
(set_attr "mode" "DF")
(set_attr "fp_int_src" "true")])
@@ -5168,7 +4547,7 @@
(float:DF (match_operand:SI 1 "nonimmediate_operand" "mr")))]
"TARGET_SSE2"
"cvtsi2sd\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecvt")
(set_attr "mode" "DF")
(set_attr "fp_int_src" "true")])
@@ -5197,7 +4576,7 @@
fild%z1\t%1
#
cvtsi2sd{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "fmov,multi,sse")
+ [(set_attr "type" "fmov,multi,ssecvt")
(set_attr "mode" "DF")
(set_attr "fp_int_src" "true")])
@@ -5206,7 +4585,7 @@
(float:DF (match_operand:DI 1 "nonimmediate_operand" "mr")))]
"TARGET_SSE2"
"cvtsi2sd{q}\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecvt")
(set_attr "mode" "DF")
(set_attr "fp_int_src" "true")])
@@ -5278,10 +4657,9 @@
;; %%% Kill these when reload knows how to do it.
(define_split
- [(set (match_operand 0 "register_operand" "")
+ [(set (match_operand 0 "fp_register_operand" "")
(float (match_operand 1 "register_operand" "")))]
- "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))
- && FP_REG_P (operands[0])"
+ "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))"
[(const_int 0)]
{
operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
@@ -5320,7 +4698,8 @@
(match_operand:DI 2 "general_operand" "")))
(clobber (reg:CC 17))]
"!TARGET_64BIT && reload_completed"
- [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] 12))
+ [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)]
+ UNSPEC_ADD_CARRY))
(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
(parallel [(set (match_dup 3)
(plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0))
@@ -5345,8 +4724,10 @@
(set_attr "ppro_uops" "few")])
(define_insn "*adddi3_cc_rex64"
- [(set (reg:CC 17) (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0")
- (match_operand:DI 2 "x86_64_general_operand" "re,rm")] 12))
+ [(set (reg:CC 17)
+ (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")]
+ UNSPEC_ADD_CARRY))
(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
(plus:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
@@ -5382,8 +4763,10 @@
(set_attr "ppro_uops" "few")])
(define_insn "*addsi3_cc"
- [(set (reg:CC 17) (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
- (match_operand:SI 2 "general_operand" "ri,rm")] 12))
+ [(set (reg:CC 17)
+ (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")]
+ UNSPEC_ADD_CARRY))
(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
(plus:SI (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, SImode, operands)"
@@ -5392,8 +4775,10 @@
(set_attr "mode" "SI")])
(define_insn "addqi3_cc"
- [(set (reg:CC 17) (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
- (match_operand:QI 2 "general_operand" "qi,qm")] 12))
+ [(set (reg:CC 17)
+ (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qi,qm")]
+ UNSPEC_ADD_CARRY))
(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(plus:QI (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, QImode, operands)"
@@ -5446,7 +4831,7 @@
(define_insn_and_split "*lea_general_1"
[(set (match_operand 0 "register_operand" "=r")
- (plus (plus (match_operand 1 "register_operand" "r")
+ (plus (plus (match_operand 1 "index_register_operand" "r")
(match_operand 2 "register_operand" "r"))
(match_operand 3 "immediate_operand" "i")))]
"(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
@@ -5478,7 +4863,7 @@
(define_insn_and_split "*lea_general_1_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
- (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+ (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "r")
(match_operand:SI 2 "register_operand" "r"))
(match_operand:SI 3 "immediate_operand" "i"))))]
"TARGET_64BIT"
@@ -5498,7 +4883,7 @@
(define_insn_and_split "*lea_general_2"
[(set (match_operand 0 "register_operand" "=r")
- (plus (mult (match_operand 1 "register_operand" "r")
+ (plus (mult (match_operand 1 "index_register_operand" "r")
(match_operand 2 "const248_operand" "i"))
(match_operand 3 "nonmemory_operand" "ri")))]
"(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
@@ -5528,7 +4913,7 @@
(define_insn_and_split "*lea_general_2_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
- (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "r")
(match_operand:SI 2 "const248_operand" "n"))
(match_operand:SI 3 "nonmemory_operand" "ri"))))]
"TARGET_64BIT"
@@ -5547,7 +4932,7 @@
(define_insn_and_split "*lea_general_3"
[(set (match_operand 0 "register_operand" "=r")
- (plus (plus (mult (match_operand 1 "register_operand" "r")
+ (plus (plus (mult (match_operand 1 "index_register_operand" "r")
(match_operand 2 "const248_operand" "i"))
(match_operand 3 "register_operand" "r"))
(match_operand 4 "immediate_operand" "i")))]
@@ -5581,7 +4966,7 @@
(define_insn_and_split "*lea_general_3_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
- (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "r")
(match_operand:SI 2 "const248_operand" "n"))
(match_operand:SI 3 "register_operand" "r"))
(match_operand:SI 4 "immediate_operand" "i"))))]
@@ -6318,9 +5703,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return "inc{w}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 65535))
+ else if (operands[2] == constm1_rtx)
return "dec{w}\t%0";
abort();
@@ -6359,9 +5742,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return "inc{w}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 65535))
+ else if (operands[2] == constm1_rtx)
return "dec{w}\t%0";
abort();
@@ -6401,9 +5782,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return "inc{w}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 65535))
+ else if (operands[2] == constm1_rtx)
return "dec{w}\t%0";
abort();
@@ -6440,9 +5819,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return "inc{w}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 65535))
+ else if (operands[2] == constm1_rtx)
return "dec{w}\t%0";
abort();
@@ -6478,9 +5855,7 @@
switch (get_attr_type (insn))
{
case TYPE_INCDEC:
- if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 65535))
+ if (operands[2] == constm1_rtx)
return "inc{w}\t%0";
else if (operands[2] == const1_rtx)
return "dec{w}\t%0";
@@ -6522,9 +5897,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return "inc{w}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 65535))
+ else if (operands[2] == constm1_rtx)
return "dec{w}\t%0";
abort();
@@ -6573,9 +5946,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 255))
+ else if (operands[2] == constm1_rtx)
return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
abort();
@@ -6621,9 +5992,7 @@
case TYPE_INCDEC:
if (operands[2] == const1_rtx)
return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
- else if (operands[2] == constm1_rtx
- || (GET_CODE (operands[2]) == CONST_INT
- && INTVAL (operands[2]) == 255))
+ else if (operands[2] == constm1_rtx)
return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
abort();
@@ -6653,6 +6022,40 @@
(const_string "alu")))
(set_attr "mode" "QI,QI,SI")])
+(define_insn "*addqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (plus:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qnm")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[1] == const1_rtx)
+ return "inc{b}\t%0";
+ else if (operands[1] == constm1_rtx)
+ return "dec{b}\t%0";
+ abort();
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */
+ if (GET_CODE (operands[1]) == CONST_INT
+ && INTVAL (operands[1]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%1, %0|%0, %1}";
+ }
+ return "add{b}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu1")))
+ (set_attr "mode" "QI")])
+
(define_insn "*addqi_2"
[(set (reg 17)
(compare
@@ -7184,6 +6587,17 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+(define_insn "*subqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (minus:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qmn")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "sub{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
(define_insn "*subqi_2"
[(set (reg 17)
(compare
@@ -7822,7 +7236,7 @@
(use (match_dup 3))
(clobber (reg:CC 17))])]
{
- /* Avoid use of cltd in favour of a mov+shift. */
+ /* Avoid use of cltd in favor of a mov+shift. */
if (!TARGET_USE_CLTD && !optimize_size)
{
if (true_regnum (operands[1]))
@@ -7907,7 +7321,7 @@
(use (match_dup 3))
(clobber (reg:CC 17))])]
{
- /* Avoid use of cltd in favour of a mov+shift. */
+ /* Avoid use of cltd in favor of a mov+shift. */
if (!TARGET_USE_CLTD && !optimize_size)
{
if (true_regnum (operands[1]))
@@ -8180,8 +7594,7 @@
(const_int 8))
(match_operand 1 "const_int_operand" "n"))
(const_int 0)))]
- "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) <= 0xff
- && ix86_match_ccmode (insn, CCNOmode)"
+ "ix86_match_ccmode (insn, CCNOmode)"
"test{b}\t{%1, %h0|%h0, %1}"
[(set_attr "type" "test")
(set_attr "mode" "QI")
@@ -8319,10 +7732,57 @@
mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1;
mask &= ~(((HOST_WIDE_INT)1 << pos) - 1);
- operands[3] = gen_rtx_AND (mode, operands[0],
- GEN_INT (trunc_int_for_mode (mask, mode)));
+ operands[3] = gen_rtx_AND (mode, operands[0], gen_int_mode (mask, mode));
})
+;; Convert HImode/SImode test instructions with immediate to QImode ones.
+;; i386 does not allow to encode test with 8bit sign extended immediate, so
+;; this is relatively important trick.
+;; Do the converison only post-reload to avoid limiting of the register class
+;; to QI regs.
+(define_split
+ [(set (reg 17)
+ (compare
+ (and (match_operand 0 "register_operand" "")
+ (match_operand 1 "const_int_operand" ""))
+ (const_int 0)))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && ((ix86_match_ccmode (insn, CCZmode)
+ && !(INTVAL (operands[1]) & ~(255 << 8)))
+ || (ix86_match_ccmode (insn, CCNOmode)
+ && !(INTVAL (operands[1]) & ~(127 << 8))))
+ && GET_MODE (operands[0]) != QImode"
+ [(set (reg:CCNO 17)
+ (compare:CCNO
+ (and:SI (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (match_dup 1))
+ (const_int 0)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_int_mode (INTVAL (operands[1]) >> 8, SImode);")
+
+(define_split
+ [(set (reg 17)
+ (compare
+ (and (match_operand 0 "nonimmediate_operand" "")
+ (match_operand 1 "const_int_operand" ""))
+ (const_int 0)))]
+ "reload_completed
+ && (!REG_P (operands[0]) || ANY_QI_REG_P (operands[0]))
+ && ((ix86_match_ccmode (insn, CCZmode)
+ && !(INTVAL (operands[1]) & ~255))
+ || (ix86_match_ccmode (insn, CCNOmode)
+ && !(INTVAL (operands[1]) & ~127)))
+ && GET_MODE (operands[0]) != QImode"
+ [(set (reg:CCNO 17)
+ (compare:CCNO
+ (and:QI (match_dup 0)
+ (match_dup 1))
+ (const_int 0)))]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);")
+
+
;; %%% This used to optimize known byte-wide and operations to memory,
;; and sometimes to QImode registers. If this is considered useful,
;; it should be done with splitters.
@@ -8445,7 +7905,7 @@
(and (match_dup 0)
(const_int -65536)))
(clobber (reg:CC 17))]
- "optimize_size"
+ "optimize_size || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)"
[(set (strict_low_part (match_dup 1)) (const_int 0))]
"operands[1] = gen_lowpart (HImode, operands[0]);")
@@ -8591,7 +8051,8 @@
(and:QI (match_dup 0)
(match_operand:QI 1 "general_operand" "qi,qmi")))
(clobber (reg:CC 17))]
- ""
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"and{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
@@ -8627,7 +8088,9 @@
(const_int 0)))
(set (strict_low_part (match_dup 0))
(and:QI (match_dup 0) (match_dup 1)))]
- "ix86_match_ccmode (insn, CCNOmode)"
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"and{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
@@ -8647,7 +8110,7 @@
(const_int 8))
(match_operand 2 "const_int_operand" "n")))
(clobber (reg:CC 17))]
- "(unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff"
+ ""
"and{b}\t{%2, %h0|%h0, %2}"
[(set_attr "type" "alu")
(set_attr "length_immediate" "1")
@@ -8675,8 +8138,7 @@
(const_int 8)
(const_int 8))
(match_dup 2)))]
- "ix86_match_ccmode (insn, CCNOmode)
- && (unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff"
+ "ix86_match_ccmode (insn, CCNOmode)"
"and{b}\t{%2, %h0|%h0, %2}"
[(set_attr "type" "alu")
(set_attr "length_immediate" "1")
@@ -8737,6 +8199,51 @@
[(set_attr "type" "alu")
(set_attr "length_immediate" "0")
(set_attr "mode" "QI")])
+
+;; Convert wide AND instructions with immediate operand to shorter QImode
+;; equivalents when possible.
+;; Don't do the splitting with memory operands, since it intoduces risc
+;; of memory mismatch stalls. We may want to do the splitting for optimizing
+;; for size, but that can (should?) be handled by generic code instead.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(~INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (and:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC 17))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since AND can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is not set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(~INTVAL (operands[2]) & ~255)
+ && !(INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (and:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
;; Logical inclusive OR instructions
@@ -8953,7 +8460,8 @@
(ior:QI (match_dup 0)
(match_operand:QI 1 "general_operand" "qmi,qi")))
(clobber (reg:CC 17))]
- ""
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"or{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
@@ -8978,7 +8486,9 @@
(const_int 0)))
(set (strict_low_part (match_dup 0))
(ior:QI (match_dup 0) (match_dup 1)))]
- "ix86_match_ccmode (insn, CCNOmode)"
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"or{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
@@ -8995,6 +8505,118 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+(define_insn "iorqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC 17))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC 17))]
+ "!TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC 17))]
+ "TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC 17))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "ior{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ior (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (ior:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC 17))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since OR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ior (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~255)
+ && (INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (ior:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
;; Logical XOR instructions
@@ -9210,11 +8832,77 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI,QI,SI")])
+(define_insn "*xorqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (xor:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qi,qmi")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "xor{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "xorqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC 17))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
(define_insn "*xorqi_ext_1"
[(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
(const_int 8)
(const_int 8))
(xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC 17))]
+ "!TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC 17))]
+ "TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
(zero_extract:SI (match_operand 1 "ext_register_operand" "0")
(const_int 8)
(const_int 8))
@@ -9222,7 +8910,7 @@
(const_int 8)
(const_int 8))))
(clobber (reg:CC 17))]
- ""
+ "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
"xor{b}\t{%h2, %h0|%h0, %h2}"
[(set_attr "type" "alu")
(set_attr "length_immediate" "0")
@@ -9242,6 +8930,20 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+(define_insn "*xorqi_2_slp"
+ [(set (reg 17)
+ (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "general_operand" "qim,qi"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (xor:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "xor{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
(define_insn "*xorqi_cc_2"
[(set (reg 17)
(compare
@@ -9316,6 +9018,46 @@
(match_dup 2)))])]
""
"")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (xor (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (xor:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC 17))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since XOR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (xor (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+ && !(INTVAL (operands[2]) & ~255)
+ && (INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (xor:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC 17))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
;; Negation instructions
@@ -9519,8 +9261,7 @@
operands[0] = force_reg (SFmode, operands[0]);
emit_move_insn (reg,
gen_lowpart (SFmode,
- GEN_INT (trunc_int_for_mode (0x80000000,
- SImode))));
+ gen_int_mode (0x80000000, SImode)));
emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9599,22 +9340,22 @@
"#")
(define_split
- [(set (match_operand:SF 0 "register_operand" "")
+ [(set (match_operand:SF 0 "fp_register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(neg:SF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:SF 0 "register_operand" "")
+ [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC 17))])]
- "operands[1] = GEN_INT (trunc_int_for_mode (0x80000000, SImode));
+ "operands[1] = gen_int_mode (0x80000000, SImode);
operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));")
(define_split
@@ -9631,7 +9372,7 @@
if (size >= 12)
size = 10;
operands[0] = adjust_address (operands[0], QImode, size - 1);
- operands[1] = GEN_INT (trunc_int_for_mode (0x80, QImode));
+ operands[1] = gen_int_mode (0x80, QImode);
})
(define_expand "negdf2"
@@ -9651,8 +9392,7 @@
in register. */
rtx reg = gen_reg_rtx (DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
- rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63,
- DImode));
+ rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
rtx imm = immed_double_const (0, 0x80000000, DImode);
#endif
@@ -9688,9 +9428,9 @@
"#")
(define_insn "*negdf2_ifs_rex64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf")
- (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
- (use (match_operand:DF 2 "general_operand" "Y,0,*g#Yr,*rm"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0")))
+ (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9775,23 +9515,22 @@
"#")
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "fp_register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(neg:DF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "!TARGET_64BIT && TARGET_80387 && reload_completed
- && !FP_REGNO_P (REGNO (operands[0]))"
+ "!TARGET_64BIT && TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 3) (xor:SI (match_dup 3) (match_dup 4)))
(clobber (reg:CC 17))])]
- "operands[4] = GEN_INT (trunc_int_for_mode (0x80000000, SImode));
+ "operands[4] = gen_int_mode (0x80000000, SImode);
split_di (operands+0, 1, operands+2, operands+3);")
(define_expand "negxf2"
@@ -9820,19 +9559,19 @@
"#")
(define_split
- [(set (match_operand:XF 0 "register_operand" "")
+ [(set (match_operand:XF 0 "fp_register_operand" "")
(neg:XF (match_operand:XF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(neg:XF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:XF 0 "register_operand" "")
+ [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "")
(neg:XF (match_operand:XF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC 17))])]
"operands[1] = GEN_INT (0x8000);
@@ -9850,19 +9589,19 @@
"#")
(define_split
- [(set (match_operand:TF 0 "register_operand" "")
+ [(set (match_operand:TF 0 "fp_register_operand" "")
(neg:TF (match_operand:TF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(neg:TF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:TF 0 "register_operand" "")
+ [(set (match_operand:TF 0 "register_and_not_fp_reg_operand" "")
(neg:TF (match_operand:TF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC 17))])]
"operands[1] = GEN_INT (0x8000);
@@ -9982,8 +9721,7 @@
operands[0] = force_reg (SFmode, operands[0]);
emit_move_insn (reg,
gen_lowpart (SFmode,
- GEN_INT (trunc_int_for_mode (0x80000000,
- SImode))));
+ gen_int_mode (0x80000000, SImode)));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -10051,22 +9789,22 @@
"#")
(define_split
- [(set (match_operand:SF 0 "register_operand" "")
+ [(set (match_operand:SF 0 "fp_register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387"
[(set (match_dup 0)
(abs:SF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:SF 0 "register_operand" "")
+ [(set (match_operand:SF 0 "register_and_not_fp_reg_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC 17))])]
- "operands[1] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode));
+ "operands[1] = gen_int_mode (~0x80000000, SImode);
operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));")
(define_split
@@ -10083,7 +9821,7 @@
if (size >= 12)
size = 10;
operands[0] = adjust_address (operands[0], QImode, size - 1);
- operands[1] = GEN_INT (trunc_int_for_mode (~0x80, QImode));
+ operands[1] = gen_int_mode (~0x80, QImode);
})
(define_expand "absdf2"
@@ -10103,8 +9841,7 @@
in register. */
rtx reg = gen_reg_rtx (DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
- rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63,
- DImode));
+ rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
rtx imm = immed_double_const (0, 0x80000000, DImode);
#endif
@@ -10205,23 +9942,22 @@
"#")
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "fp_register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(abs:DF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "register_and_not_fp_reg_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "!TARGET_64BIT && TARGET_80387 && reload_completed &&
- !FP_REGNO_P (REGNO (operands[0]))"
+ "!TARGET_64BIT && TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 3) (and:SI (match_dup 3) (match_dup 4)))
(clobber (reg:CC 17))])]
- "operands[4] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode));
+ "operands[4] = gen_int_mode (~0x80000000, SImode);
split_di (operands+0, 1, operands+2, operands+3);")
(define_expand "absxf2"
@@ -10250,19 +9986,19 @@
"#")
(define_split
- [(set (match_operand:XF 0 "register_operand" "")
+ [(set (match_operand:XF 0 "fp_register_operand" "")
(abs:XF (match_operand:XF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(abs:XF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:XF 0 "register_operand" "")
+ [(set (match_operand:XF 0 "register_and_not_fp_reg_operand" "")
(abs:XF (match_operand:XF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC 17))])]
"operands[1] = GEN_INT (~0x8000);
@@ -10277,19 +10013,19 @@
"#")
(define_split
- [(set (match_operand:TF 0 "register_operand" "")
+ [(set (match_operand:TF 0 "fp_register_operand" "")
(abs:TF (match_operand:TF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed"
+ "TARGET_80387 && reload_completed"
[(set (match_dup 0)
(abs:TF (match_dup 1)))]
"")
(define_split
- [(set (match_operand:TF 0 "register_operand" "")
+ [(set (match_operand:TF 0 "register_and_not_any_fp_reg_operand" "")
(abs:TF (match_operand:TF 1 "register_operand" "")))
(clobber (reg:CC 17))]
- "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))"
+ "TARGET_80387 && reload_completed"
[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC 17))])]
"operands[1] = GEN_INT (~0x8000);
@@ -10643,7 +10379,7 @@
return "sal{q}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{q}\t%0";
else
return "sal{q}\t{%2, %0|%0, %2}";
@@ -10672,8 +10408,7 @@
[(set (match_dup 0)
(mult:DI (match_dup 1)
(match_dup 2)))]
- "operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]),
- DImode));")
+ "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);")
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
@@ -10701,7 +10436,7 @@
return "sal{q}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{q}\t%0";
else
return "sal{q}\t{%2, %0|%0, %2}";
@@ -10849,7 +10584,7 @@
return "sal{l}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{l}\t%0";
else
return "sal{l}\t{%2, %0|%0, %2}";
@@ -10870,7 +10605,7 @@
;; Convert lea to the lea pattern to avoid flags dependency.
(define_split
[(set (match_operand 0 "register_operand" "")
- (ashift (match_operand 1 "register_operand" "")
+ (ashift (match_operand 1 "index_register_operand" "")
(match_operand:QI 2 "const_int_operand" "")))
(clobber (reg:CC 17))]
"reload_completed
@@ -10880,8 +10615,7 @@
rtx pat;
operands[0] = gen_lowpart (SImode, operands[0]);
operands[1] = gen_lowpart (Pmode, operands[1]);
- operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]),
- Pmode));
+ operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
if (Pmode != SImode)
pat = gen_rtx_SUBREG (SImode, pat, 0);
@@ -10889,6 +10623,26 @@
DONE;
})
+;; Rare case of shifting RSP is handled by generating move and shift
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "register_operand" "")
+ (match_operand:QI 2 "const_int_operand" "")))
+ (clobber (reg:CC 17))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(const_int 0)]
+{
+ rtx pat, clob;
+ emit_move_insn (operands[1], operands[0]);
+ pat = gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_ASHIFT (GET_MODE (operands[0]),
+ operands[0], operands[2]));
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob)));
+ DONE;
+})
+
(define_insn "*ashlsi3_1_zext"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,r")
@@ -10911,7 +10665,7 @@
return "sal{l}\t{%b2, %k0|%k0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{l}\t%k0";
else
return "sal{l}\t{%2, %k0|%k0, %2}";
@@ -10939,8 +10693,7 @@
[(set (match_dup 0) (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))]
{
operands[1] = gen_lowpart (Pmode, operands[1]);
- operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]),
- Pmode));
+ operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
})
;; This pattern can't accept a variable shift count, since shifts by
@@ -10969,7 +10722,7 @@
return "sal{l}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{l}\t%0";
else
return "sal{l}\t{%2, %0|%0, %2}";
@@ -11008,7 +10761,7 @@
return "sal{l}\t{%b2, %k0|%k0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{l}\t%k0";
else
return "sal{l}\t{%2, %k0|%k0, %2}";
@@ -11053,7 +10806,7 @@
return "sal{w}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{w}\t%0";
else
return "sal{w}\t{%2, %0|%0, %2}";
@@ -11091,7 +10844,7 @@
return "sal{w}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{w}\t%0";
else
return "sal{w}\t{%2, %0|%0, %2}";
@@ -11133,7 +10886,7 @@
return "sal{w}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{w}\t%0";
else
return "sal{w}\t{%2, %0|%0, %2}";
@@ -11189,7 +10942,7 @@
}
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
{
if (get_attr_mode (insn) == MODE_SI)
return "sal{l}\t%0";
@@ -11245,7 +10998,7 @@
}
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
{
if (get_attr_mode (insn) == MODE_SI)
return "sal{l}\t%0";
@@ -11297,7 +11050,7 @@
return "sal{b}\t{%b2, %0|%0, %b2}";
else if (GET_CODE (operands[2]) == CONST_INT
&& INTVAL (operands[2]) == 1
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO))
+ && (TARGET_SHIFT1 || optimize_size))
return "sal{b}\t%0";
else
return "sal{b}\t{%2, %0|%0, %2}";
@@ -11353,7 +11106,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"sar{q}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -11385,7 +11138,7 @@
(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
(ashiftrt:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
"sar{q}\t%0"
[(set_attr "type" "ishift")
@@ -11542,7 +11295,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"sar{l}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -11556,7 +11309,7 @@
(match_operand:QI 2 "const_int_1_operand" ""))))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"sar{l}\t%k0"
[(set_attr "type" "ishift")
(set_attr "length" "2")])
@@ -11597,7 +11350,7 @@
(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
(ashiftrt:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
"sar{l}\t%0"
[(set_attr "type" "ishift")
@@ -11615,7 +11368,7 @@
(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
"sar{l}\t%k0"
[(set_attr "type" "ishift")
@@ -11666,7 +11419,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"sar{w}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -11698,7 +11451,7 @@
(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
(ashiftrt:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
"sar{w}\t%0"
[(set_attr "type" "ishift")
@@ -11738,7 +11491,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"sar{b}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -11746,6 +11499,21 @@
(const_string "2")
(const_string "*")))])
+(define_insn "*ashrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (ashiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "const_int_1_operand" "")))
+ (clobber (reg:CC 17))]
+ "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
+ && (! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
(define_insn "*ashrqi3_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
(ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
@@ -11758,6 +11526,19 @@
[(set_attr "type" "ishift")
(set_attr "mode" "QI")])
+(define_insn "*ashrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (ashiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ sar{b}\t{%1, %0|%0, %1}
+ sar{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "ishift1")
+ (set_attr "mode" "QI")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
@@ -11770,7 +11551,7 @@
(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
(ashiftrt:QI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
"sar{b}\t%0"
[(set_attr "type" "ishift")
@@ -11822,7 +11603,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"shr{q}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -11854,7 +11635,7 @@
(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
(lshiftrt:DI (match_dup 1) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
"shr{q}\t%0"
[(set_attr "type" "ishift")
@@ -11932,7 +11713,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"shr{l}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -11946,7 +11727,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"shr{l}\t%k0"
[(set_attr "type" "ishift")
(set_attr "length" "2")])
@@ -11988,7 +11769,7 @@
(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
(lshiftrt:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
"shr{l}\t%0"
[(set_attr "type" "ishift")
@@ -12006,7 +11787,7 @@
(set (match_operand:DI 0 "register_operand" "=r")
(lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
"TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
"shr{l}\t%k0"
[(set_attr "type" "ishift")
@@ -12057,7 +11838,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"shr{w}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -12089,7 +11870,7 @@
(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
(lshiftrt:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
"shr{w}\t%0"
[(set_attr "type" "ishift")
@@ -12129,7 +11910,7 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"shr{b}\t%0"
[(set_attr "type" "ishift")
(set (attr "length")
@@ -12137,6 +11918,20 @@
(const_string "2")
(const_string "*")))])
+(define_insn "*lshrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (lshiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "const_int_1_operand" "")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
(define_insn "*lshrqi3_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
(lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
@@ -12149,6 +11944,19 @@
[(set_attr "type" "ishift")
(set_attr "mode" "QI")])
+(define_insn "*lshrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (lshiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ shr{b}\t{%1, %0|%0, %1}
+ shr{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "ishift1")
+ (set_attr "mode" "QI")])
+
;; This pattern can't accept a variable shift count, since shifts by
;; zero don't affect the flags. We assume that shifts by constant
;; zero are optimized away.
@@ -12161,7 +11969,7 @@
(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
(lshiftrt:QI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCGOCmode)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)
+ && (TARGET_SHIFT1 || optimize_size)
&& ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
"shr{b}\t%0"
[(set_attr "type" "ishift")
@@ -12203,9 +12011,9 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"rol{q}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand:DI 0 "register_operand" "")
(const_string "2")
@@ -12220,7 +12028,7 @@
"@
rol{q}\t{%2, %0|%0, %2}
rol{q}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "DI")])
(define_expand "rotlsi3"
@@ -12237,9 +12045,9 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ROTATE, SImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"rol{l}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand:SI 0 "register_operand" "")
(const_string "2")
@@ -12252,9 +12060,9 @@
(match_operand:QI 2 "const_int_1_operand" ""))))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"rol{l}\t%k0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "length" "2")])
(define_insn "*rotlsi3_1"
@@ -12266,7 +12074,7 @@
"@
rol{l}\t{%2, %0|%0, %2}
rol{l}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "SI")])
(define_insn "*rotlsi3_1_zext"
@@ -12279,7 +12087,7 @@
"@
rol{l}\t{%2, %k0|%k0, %2}
rol{l}\t{%b2, %k0|%k0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "SI")])
(define_expand "rotlhi3"
@@ -12296,9 +12104,9 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ROTATE, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"rol{w}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand 0 "register_operand" "")
(const_string "2")
@@ -12313,7 +12121,7 @@
"@
rol{w}\t{%2, %0|%0, %2}
rol{w}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "HI")])
(define_expand "rotlqi3"
@@ -12324,20 +12132,47 @@
"TARGET_QIMODE_MATH"
"ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
+(define_insn "*rotlqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (rotate:QI (match_dup 0)
+ (match_operand:QI 1 "const_int_1_operand" "")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "rol{b}\t%0"
+ [(set_attr "type" "rotate1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
(define_insn "*rotlqi3_1_one_bit"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ROTATE, QImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"rol{b}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand 0 "register_operand" "")
(const_string "2")
(const_string "*")))])
+(define_insn "*rotlqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (rotate:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ rol{b}\t{%1, %0|%0, %1}
+ rol{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "rotate1")
+ (set_attr "mode" "QI")])
+
(define_insn "*rotlqi3_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
(rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
@@ -12347,7 +12182,7 @@
"@
rol{b}\t{%2, %0|%0, %2}
rol{b}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "QI")])
(define_expand "rotrdi3"
@@ -12364,9 +12199,9 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"ror{q}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand:DI 0 "register_operand" "")
(const_string "2")
@@ -12381,7 +12216,7 @@
"@
ror{q}\t{%2, %0|%0, %2}
ror{q}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "DI")])
(define_expand "rotrsi3"
@@ -12398,9 +12233,9 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ROTATERT, SImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"ror{l}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand:SI 0 "register_operand" "")
(const_string "2")
@@ -12413,9 +12248,9 @@
(match_operand:QI 2 "const_int_1_operand" ""))))
(clobber (reg:CC 17))]
"TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"ror{l}\t%k0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand:SI 0 "register_operand" "")
(const_string "2")
@@ -12430,7 +12265,7 @@
"@
ror{l}\t{%2, %0|%0, %2}
ror{l}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "SI")])
(define_insn "*rotrsi3_1_zext"
@@ -12443,7 +12278,7 @@
"@
ror{l}\t{%2, %k0|%k0, %2}
ror{l}\t{%b2, %k0|%k0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "SI")])
(define_expand "rotrhi3"
@@ -12460,9 +12295,9 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ROTATERT, HImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"ror{w}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set (attr "length")
(if_then_else (match_operand 0 "register_operand" "")
(const_string "2")
@@ -12477,7 +12312,7 @@
"@
ror{w}\t{%2, %0|%0, %2}
ror{w}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
(set_attr "mode" "HI")])
(define_expand "rotrqi3"
@@ -12494,9 +12329,23 @@
(match_operand:QI 2 "const_int_1_operand" "")))
(clobber (reg:CC 17))]
"ix86_binary_operator_ok (ROTATERT, QImode, operands)
- && (TARGET_PENTIUM || TARGET_PENTIUMPRO)"
+ && (TARGET_SHIFT1 || optimize_size)"
"ror{b}\t%0"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (rotatert:QI (match_dup 0)
+ (match_operand:QI 1 "const_int_1_operand" "")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (TARGET_SHIFT1 || optimize_size)"
+ "ror{b}\t%0"
+ [(set_attr "type" "rotate1")
(set (attr "length")
(if_then_else (match_operand 0 "register_operand" "")
(const_string "2")
@@ -12511,7 +12360,20 @@
"@
ror{b}\t{%2, %0|%0, %2}
ror{b}\t{%b2, %0|%0, %b2}"
- [(set_attr "type" "ishift")
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rotrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (rotatert:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC 17))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ ror{b}\t{%1, %0|%0, %1}
+ ror{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "rotate1")
(set_attr "mode" "QI")])
;; Bit set / bit test instructions
@@ -12789,7 +12651,7 @@
(match_operand:SF 3 "nonimmediate_operand" "xm")]))]
"TARGET_SSE && reload_completed"
"cmp%D1ss\t{%3, %0|%0, %3}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecmp")
(set_attr "mode" "SF")])
(define_insn "*sse_setccdf"
@@ -12799,7 +12661,7 @@
(match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
"TARGET_SSE2 && reload_completed"
"cmp%D1sd\t{%3, %0|%0, %3}"
- [(set_attr "type" "sse")
+ [(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
;; Basic conditional jump instructions.
@@ -12961,13 +12823,14 @@
""
"%+j%C1\t%l0"
[(set_attr "type" "ibr")
- (set (attr "prefix_0f")
+ (set_attr "modrm" "0")
+ (set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
- (const_int 0)
- (const_int 1)))])
+ (const_int 128)))
+ (const_int 2)
+ (const_int 6)))])
(define_insn "*jcc_2"
[(set (pc)
@@ -12978,13 +12841,14 @@
""
"%+j%c1\t%l0"
[(set_attr "type" "ibr")
- (set (attr "prefix_0f")
+ (set_attr "modrm" "0")
+ (set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
- (const_int 0)
- (const_int 1)))])
+ (const_int 128)))
+ (const_int 2)
+ (const_int 6)))])
;; In general it is not safe to assume too much about CCmode registers,
;; so simplify-rtx stops when it sees a second one. Under certain
@@ -13244,7 +13108,15 @@
(label_ref (match_operand 0 "" "")))]
""
"jmp\t%l0"
- [(set_attr "type" "ibr")])
+ [(set_attr "type" "ibr")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 5)))
+ (set_attr "modrm" "0")])
(define_expand "indirect_jump"
[(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))]
@@ -13270,29 +13142,34 @@
(use (label_ref (match_operand 1 "" "")))])]
""
{
- /* In PIC mode, the table entries are stored GOT-relative. Convert
- the relative address to an absolute address. */
+ /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
+ relative. Convert the relative address to an absolute address. */
if (flag_pic)
{
+ rtx op0, op1;
+ enum rtx_code code;
+
if (TARGET_64BIT)
- operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
- gen_rtx_LABEL_REF (Pmode, operands[1]),
- NULL_RTX, 0,
- OPTAB_DIRECT);
- else if (HAVE_AS_GOTOFF_IN_DATA)
{
- operands[0] = expand_simple_binop (Pmode, PLUS, operands[0],
- pic_offset_table_rtx, NULL_RTX,
- 1, OPTAB_DIRECT);
- current_function_uses_pic_offset_table = 1;
+ code = PLUS;
+ op0 = operands[0];
+ op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+ }
+ else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
+ {
+ code = PLUS;
+ op0 = operands[0];
+ op1 = pic_offset_table_rtx;
}
else
{
- operands[0] = expand_simple_binop (Pmode, MINUS, pic_offset_table_rtx,
- operands[0], NULL_RTX, 1,
- OPTAB_DIRECT);
- current_function_uses_pic_offset_table = 1;
+ code = MINUS;
+ op0 = pic_offset_table_rtx;
+ op1 = operands[0];
}
+
+ operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
+ OPTAB_DIRECT);
}
})
@@ -13357,14 +13234,17 @@
return "dec{l}\t%1\;%+jne\t%l0";
}
[(set_attr "ppro_uops" "many")
- (set (attr "type")
+ (set (attr "length")
(if_then_else (and (eq_attr "alternative" "0")
(and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124))))
- (const_string "ibr")
- (const_string "multi")))])
+ (const_int 128))))
+ (const_int 2)
+ (const_int 16)))
+ ;; We don't know the type before shorten branches. Optimistically expect
+ ;; the loop instruction to match.
+ (set (attr "type") (const_string "ibr"))])
(define_split
[(set (pc)
@@ -13474,21 +13354,8 @@
(match_operand:SI 3 "" "")))])]
"!TARGET_64BIT"
{
- if (operands[3] == const0_rtx)
- {
- emit_insn (gen_call (operands[0], operands[1], constm1_rtx));
- DONE;
- }
- /* Static functions and indirect calls don't need
- current_function_uses_pic_offset_table. */
- if (flag_pic
- && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
- && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0)))
- current_function_uses_pic_offset_table = 1;
- if (! call_insn_operand (XEXP (operands[0], 0), Pmode))
- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
- if (TARGET_64BIT)
- abort();
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3]);
+ DONE;
})
(define_insn "*call_pop_0"
@@ -13530,37 +13397,12 @@
[(call (match_operand:QI 0 "" "")
(match_operand 1 "" ""))
(use (match_operand 2 "" ""))]
- ;; Operand 1 not used on the i386.
""
{
- rtx insn;
- /* Static functions and indirect calls don't need
- current_function_uses_pic_offset_table. */
- if (flag_pic
- && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
- && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0)))
- current_function_uses_pic_offset_table = 1;
-
- if (! call_insn_operand (XEXP (operands[0], 0), Pmode))
- XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
- if (TARGET_64BIT && INTVAL (operands[2]) >= 0)
- {
- rtx reg = gen_rtx_REG (QImode, 0);
- emit_move_insn (reg, operands[2]);
- insn = emit_call_insn (gen_call_exp (operands[0], operands[1]));
- use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
- DONE;
- }
- insn = emit_call_insn (gen_call_exp (operands[0], operands[1]));
- DONE;
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL);
+ DONE;
})
-(define_expand "call_exp"
- [(call (match_operand:QI 0 "" "")
- (match_operand 1 "" ""))]
- ""
- "")
-
(define_insn "*call_0"
[(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
(match_operand 1 "" ""))]
@@ -13612,7 +13454,6 @@
[(set_attr "type" "call")])
;; Call subroutine, returning value in operand 0
-;; (which must be a hard register).
(define_expand "call_value_pop"
[(parallel [(set (match_operand 0 "" "")
@@ -13623,20 +13464,9 @@
(match_operand:SI 4 "" "")))])]
"!TARGET_64BIT"
{
- if (operands[4] == const0_rtx)
- {
- emit_insn (gen_call_value (operands[0], operands[1], operands[2],
- constm1_rtx));
- DONE;
- }
- /* Static functions and indirect calls don't need
- current_function_uses_pic_offset_table. */
- if (flag_pic
- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
- && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0)))
- current_function_uses_pic_offset_table = 1;
- if (! call_insn_operand (XEXP (operands[1], 0), Pmode))
- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+ ix86_expand_call (operands[0], operands[1], operands[2],
+ operands[3], operands[4]);
+ DONE;
})
(define_expand "call_value"
@@ -13647,36 +13477,10 @@
;; Operand 2 not used on the i386.
""
{
- rtx insn;
- /* Static functions and indirect calls don't need
- current_function_uses_pic_offset_table. */
- if (flag_pic
- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
- && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0)))
- current_function_uses_pic_offset_table = 1;
- if (! call_insn_operand (XEXP (operands[1], 0), Pmode))
- XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
- if (TARGET_64BIT && INTVAL (operands[3]) >= 0)
- {
- rtx reg = gen_rtx_REG (QImode, 0);
- emit_move_insn (reg, operands[3]);
- insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1],
- operands[2]));
- use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
- DONE;
- }
- insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1],
- operands[2]));
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL);
DONE;
})
-(define_expand "call_value_exp"
- [(set (match_operand 0 "" "")
- (call (match_operand:QI 1 "" "")
- (match_operand:SI 2 "" "")))]
- ""
- "")
-
;; Call subroutine returning any type.
(define_expand "untyped_call"
@@ -13693,12 +13497,10 @@
simply pretend the untyped call returns a complex long double
value. */
- emit_call_insn (TARGET_FLOAT_RETURNS_IN_80387
- ? gen_call_value (gen_rtx_REG (XCmode, FIRST_FLOAT_REG),
- operands[0], const0_rtx,
- GEN_INT (SSE_REGPARM_MAX - 1))
- : gen_call (operands[0], const0_rtx,
- GEN_INT (SSE_REGPARM_MAX - 1)));
+ ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
+ ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
+ operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1),
+ NULL);
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
@@ -13710,7 +13512,7 @@
registers we stored in the result block. We avoid problems by
claiming that all hard registers are used and clobbered at this
point. */
- emit_insn (gen_blockage ());
+ emit_insn (gen_blockage (const0_rtx));
DONE;
})
@@ -13721,7 +13523,7 @@
;; all of memory. This blocks insns from being moved across this point.
(define_insn "blockage"
- [(unspec_volatile [(const_int 0)] 0)]
+ [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)]
""
""
[(set_attr "length" "0")])
@@ -13781,45 +13583,14 @@
""
"ix86_expand_prologue (); DONE;")
-(define_insn "prologue_set_got"
+(define_insn "set_got"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec_volatile:SI
- [(plus:SI (match_dup 0)
- (plus:SI (match_operand:SI 1 "symbolic_operand" "")
- (minus:SI (pc) (match_operand 2 "" ""))))] 1))
+ (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
(clobber (reg:CC 17))]
"!TARGET_64BIT"
-{
- if (GET_CODE (operands[2]) == LABEL_REF)
- operands[2] = XEXP (operands[2], 0);
- if (TARGET_DEEP_BRANCH_PREDICTION)
- return "add{l}\t{%1, %0|%0, %1}";
- else
- return "add{l}\t{%1+[.-%X2], %0|%0, %a1+(.-%X2)}";
-}
- [(set_attr "type" "alu")
- ; Since this insn may have two constant operands, we must set the
- ; length manually.
- (set_attr "length_immediate" "4")
- (set_attr "mode" "SI")])
-
-(define_insn "prologue_get_pc"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (unspec_volatile:SI [(plus:SI (pc) (match_operand 1 "" ""))] 2))]
- "!TARGET_64BIT"
-{
- if (GET_CODE (operands[1]) == LABEL_REF)
- operands[1] = XEXP (operands[1], 0);
- output_asm_insn ("call\t%X1", operands);
- if (! TARGET_DEEP_BRANCH_PREDICTION)
- {
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
- CODE_LABEL_NUMBER (operands[1]));
- return "pop{l}\t%0";
- }
- RET;
-}
- [(set_attr "type" "multi")])
+ { return output_set_got (operands[0]); }
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
(define_expand "epilogue"
[(const_int 1)]
@@ -13832,11 +13603,10 @@
"ix86_expand_epilogue (0); DONE;")
(define_expand "eh_return"
- [(use (match_operand 0 "register_operand" ""))
- (use (match_operand 1 "register_operand" ""))]
+ [(use (match_operand 0 "register_operand" ""))]
""
{
- rtx tmp, sa = operands[0], ra = operands[1];
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
/* Tricky bit: we write the address of the handler to which we will
be returning into someone else's stack frame, one word below the
@@ -13855,7 +13625,8 @@
})
(define_insn_and_split "eh_return_si"
- [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] 13)]
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")]
+ UNSPECV_EH_RETURN)]
"!TARGET_64BIT"
"#"
"reload_completed"
@@ -13863,7 +13634,8 @@
"ix86_expand_epilogue (2); DONE;")
(define_insn_and_split "eh_return_di"
- [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] 13)]
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")]
+ UNSPECV_EH_RETURN)]
"TARGET_64BIT"
"#"
"reload_completed"
@@ -13879,7 +13651,6 @@
[(set_attr "length_immediate" "0")
(set_attr "length" "1")
(set_attr "modrm" "0")
- (set_attr "modrm" "0")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
@@ -13892,13 +13663,12 @@
[(set_attr "length_immediate" "0")
(set_attr "length" "1")
(set_attr "modrm" "0")
- (set_attr "modrm" "0")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")])
(define_expand "ffssi2"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
- (ffs:SI (match_operand:SI 1 "general_operand" "")))]
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
""
{
rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
@@ -13987,7 +13757,7 @@
(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
(const_int 0)))
(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_dup 1)] 5))]
+ (unspec:SI [(match_dup 1)] UNSPEC_BSF))]
""
"bsf{l}\t{%1, %0|%0, %1}"
[(set_attr "prefix_0f" "1")
@@ -13996,6 +13766,173 @@
;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger
;; and slower than the two-byte movzx insn needed to do the work in SImode.
+;; Thread-local storage patterns for ELF.
+;;
+;; Note that these code sequences must appear exactly as shown
+;; in order to allow linker relaxation.
+
+(define_insn "*tls_global_dynamic_32_gnu"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "call_insn_operand" "")]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC 17))]
+ "!TARGET_64BIT && TARGET_GNU_TLS"
+ "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3"
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "*tls_global_dynamic_32_sun"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "call_insn_operand" "")]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC 17))]
+ "!TARGET_64BIT && TARGET_SUN_TLS"
+ "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]}
+ push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop"
+ [(set_attr "type" "multi")
+ (set_attr "length" "14")])
+
+(define_expand "tls_global_dynamic_32"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI
+ [(match_dup 2)
+ (match_operand:SI 1 "tls_symbolic_operand" "")
+ (match_dup 3)]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (match_scratch:SI 5 ""))
+ (clobber (reg:CC 17))])]
+ ""
+{
+ if (flag_pic)
+ operands[2] = pic_offset_table_rtx;
+ else
+ {
+ operands[2] = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (operands[2]));
+ }
+ operands[3] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_global_dynamic_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
+ (match_operand:DI 3 "" "")))
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLS_GD)]
+ "TARGET_64BIT"
+ ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2"
+ [(set_attr "type" "multi")
+ (set_attr "length" "16")])
+
+(define_expand "tls_global_dynamic_64"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (call (mem:QI (match_dup 2)) (const_int 0)))
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLS_GD)])]
+ ""
+{
+ operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_32_gnu"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 "=d"))
+ (clobber (match_scratch:SI 4 "=c"))
+ (clobber (reg:CC 17))]
+ "!TARGET_64BIT && TARGET_GNU_TLS"
+ "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2"
+ [(set_attr "type" "multi")
+ (set_attr "length" "11")])
+
+(define_insn "*tls_local_dynamic_base_32_sun"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 "=d"))
+ (clobber (match_scratch:SI 4 "=c"))
+ (clobber (reg:CC 17))]
+ "!TARGET_64BIT && TARGET_SUN_TLS"
+ "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]}
+ push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3"
+ [(set_attr "type" "multi")
+ (set_attr "length" "13")])
+
+(define_expand "tls_local_dynamic_base_32"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_dup 1) (match_dup 2)]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (reg:CC 17))])]
+ ""
+{
+ if (flag_pic)
+ operands[1] = pic_offset_table_rtx;
+ else
+ {
+ operands[1] = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (operands[1]));
+ }
+ operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
+ (match_operand:DI 2 "" "")))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+ "TARGET_64BIT"
+ "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1"
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_expand "tls_local_dynamic_base_64"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (call (mem:QI (match_dup 1)) (const_int 0)))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
+ ""
+{
+ operands[1] = ix86_tls_get_addr ();
+})
+
+;; Local dynamic of a single variable is a lose. Show combine how
+;; to convert that back to global dynamic.
+
+(define_insn_and_split "*tls_local_dynamic_32_once"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE)
+ (const:SI (unspec:SI
+ [(match_operand:SI 3 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC 17))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 0)
+ (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+ UNSPEC_TLS_GD))
+ (clobber (match_dup 4))
+ (clobber (match_dup 5))
+ (clobber (reg:CC 17))])]
+ "")
+
;; These patterns match the binary 387 instructions for addM3, subM3,
;; mulM3 and divM3. There are three patterns for each of DFmode and
;; SFmode. The first is the normal insn, the second the same insn but
@@ -14032,7 +13969,9 @@
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
(if_then_else (eq_attr "alternative" "1")
- (const_string "sse")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
(if_then_else (match_operand:SF 3 "mult_operator" "")
(const_string "fmul")
(const_string "fop"))))
@@ -14046,7 +13985,10 @@
"TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
- [(set_attr "type" "sse")
+ [(set (attr "type")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
(set_attr "mode" "SF")])
(define_insn "*fop_df_comm_nosse"
@@ -14075,7 +14017,9 @@
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
(if_then_else (eq_attr "alternative" "1")
- (const_string "sse")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
(if_then_else (match_operand:SF 3 "mult_operator" "")
(const_string "fmul")
(const_string "fop"))))
@@ -14090,7 +14034,10 @@
&& GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
- [(set_attr "type" "sse")
+ [(set (attr "type")
+ (if_then_else (match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
(set_attr "mode" "DF")])
(define_insn "*fop_xf_comm"
@@ -14148,8 +14095,14 @@
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
- (cond [(eq_attr "alternative" "2")
- (const_string "sse")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:SF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:SF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
(match_operand:SF 3 "mult_operator" "")
(const_string "fmul")
(match_operand:SF 3 "div_operator" "")
@@ -14166,7 +14119,13 @@
"TARGET_SSE_MATH
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'"
"* return output_387_binary_op (insn, operands);"
- [(set_attr "type" "sse")
+ [(set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))
(set_attr "mode" "SF")])
;; ??? Add SSE splitters for these!
@@ -14218,7 +14177,7 @@
[(set (attr "type")
(cond [(match_operand:DF 3 "mult_operator" "")
(const_string "fmul")
- (match_operand:DF 3 "div_operator" "")
+ (match_operand:DF 3 "div_operator" "")
(const_string "fdiv")
]
(const_string "fop")))
@@ -14235,8 +14194,14 @@
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"* return output_387_binary_op (insn, operands);"
[(set (attr "type")
- (cond [(eq_attr "alternative" "2")
- (const_string "sse")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:SF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:SF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
(match_operand:DF 3 "mult_operator" "")
(const_string "fmul")
(match_operand:DF 3 "div_operator" "")
@@ -14253,7 +14218,14 @@
"TARGET_SSE2 && TARGET_SSE_MATH
&& GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'"
"* return output_387_binary_op (insn, operands);"
- [(set_attr "type" "sse")])
+ [(set_attr "mode" "DF")
+ (set (attr "type")
+ (cond [(match_operand:SF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:SF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))])
;; ??? Add SSE splitters for these!
(define_insn "*fop_df_2"
@@ -14760,7 +14732,7 @@
(define_insn "sindf2"
[(set (match_operand:DF 0 "register_operand" "=f")
- (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 1))]
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fsin"
@@ -14769,7 +14741,7 @@
(define_insn "sinsf2"
[(set (match_operand:SF 0 "register_operand" "=f")
- (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 1))]
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fsin"
@@ -14779,7 +14751,8 @@
(define_insn "*sinextendsfdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
(unspec:DF [(float_extend:DF
- (match_operand:SF 1 "register_operand" "0"))] 1))]
+ (match_operand:SF 1 "register_operand" "0"))]
+ UNSPEC_SIN))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fsin"
@@ -14788,7 +14761,7 @@
(define_insn "sinxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
- (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 1))]
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))]
"!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387
&& flag_unsafe_math_optimizations"
"fsin"
@@ -14797,7 +14770,7 @@
(define_insn "sintf2"
[(set (match_operand:TF 0 "register_operand" "=f")
- (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 1))]
+ (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_SIN))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fsin"
@@ -14806,7 +14779,7 @@
(define_insn "cosdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
- (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 2))]
+ (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
@@ -14815,7 +14788,7 @@
(define_insn "cossf2"
[(set (match_operand:SF 0 "register_operand" "=f")
- (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 2))]
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
@@ -14825,7 +14798,8 @@
(define_insn "*cosextendsfdf2"
[(set (match_operand:DF 0 "register_operand" "=f")
(unspec:DF [(float_extend:DF
- (match_operand:SF 1 "register_operand" "0"))] 2))]
+ (match_operand:SF 1 "register_operand" "0"))]
+ UNSPEC_COS))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
@@ -14834,8 +14808,8 @@
(define_insn "cosxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
- (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 2))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
+ "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
[(set_attr "type" "fpspc")
@@ -14843,7 +14817,7 @@
(define_insn "costf2"
[(set (match_operand:TF 0 "register_operand" "=f")
- (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 2))]
+ (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_COS))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
@@ -15737,7 +15711,7 @@
[(set (match_operand:SI 0 "register_operand" "")
(unspec:SI [(match_operand:BLK 1 "general_operand" "")
(match_operand:QI 2 "immediate_operand" "")
- (match_operand 3 "immediate_operand" "")] 0))]
+ (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
""
{
if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
@@ -15750,7 +15724,7 @@
[(set (match_operand:DI 0 "register_operand" "")
(unspec:DI [(match_operand:BLK 1 "general_operand" "")
(match_operand:QI 2 "immediate_operand" "")
- (match_operand 3 "immediate_operand" "")] 0))]
+ (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
""
{
if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
@@ -15764,7 +15738,7 @@
(unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
(match_operand:QI 2 "register_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")
- (match_operand:SI 4 "register_operand" "0")] 0))
+ (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS))
(use (reg:SI 19))
(clobber (match_operand:SI 1 "register_operand" "=D"))
(clobber (reg:CC 17))]
@@ -15779,7 +15753,7 @@
(unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
(match_operand:QI 2 "register_operand" "a")
(match_operand:DI 3 "immediate_operand" "i")
- (match_operand:DI 4 "register_operand" "0")] 0))
+ (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS))
(use (reg:SI 19))
(clobber (match_operand:DI 1 "register_operand" "=D"))
(clobber (reg:CC 17))]
@@ -15895,6 +15869,7 @@
; Since we don't have the proper number of operands for an alu insn,
; fill in all the blanks.
[(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
(set_attr "mode" "DI")
@@ -15937,6 +15912,7 @@
; Since we don't have the proper number of operands for an alu insn,
; fill in all the blanks.
[(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
(set_attr "memory" "none")
(set_attr "imm_disp" "false")
(set_attr "mode" "SI")
@@ -16043,12 +16019,12 @@
(set_attr "mode" "DF")])
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
[(match_operand 4 "" "") (const_int 0)])
(match_operand:DF 2 "nonimmediate_operand" "")
(match_operand:DF 3 "nonimmediate_operand" "")))]
- "!TARGET_64BIT && !ANY_FP_REG_P (operands[0]) && reload_completed"
+ "!TARGET_64BIT && reload_completed"
[(set (match_dup 2)
(if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
(match_dup 5)
@@ -16156,13 +16132,13 @@
;; We can't represent the LT test directly. Do this by swapping the operands.
(define_split
- [(set (match_operand:SF 0 "register_operand" "")
+ [(set (match_operand:SF 0 "fp_register_operand" "")
(if_then_else:SF (lt (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "register_operand" ""))
(match_operand:SF 3 "register_operand" "")
(match_operand:SF 4 "register_operand" "")))
(clobber (reg:CC 17))]
- "FP_REG_P (operands[0]) && reload_completed
+ "reload_completed
&& ((operands_match_p (operands[1], operands[3])
&& operands_match_p (operands[2], operands[4]))
|| (operands_match_p (operands[1], operands[4])
@@ -16238,13 +16214,13 @@
;; We can't represent the LT test directly. Do this by swapping the operands.
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "fp_register_operand" "")
(if_then_else:DF (lt (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "register_operand" ""))
(match_operand:DF 3 "register_operand" "")
(match_operand:DF 4 "register_operand" "")))
(clobber (reg:CC 17))]
- "FP_REG_P (operands[0]) && reload_completed
+ "reload_completed
&& ((operands_match_p (operands[1], operands[3])
&& operands_match_p (operands[2], operands[4]))
|| (operands_match_p (operands[1], operands[4])
@@ -16319,13 +16295,13 @@
(match_dup 2)))])
(define_split
- [(set (match_operand:SF 0 "register_operand" "")
+ [(set (match_operand:SF 0 "fp_register_operand" "")
(if_then_else:SF (gt (match_operand:SF 1 "register_operand" "")
(match_operand:SF 2 "register_operand" ""))
(match_operand:SF 3 "register_operand" "")
(match_operand:SF 4 "register_operand" "")))
(clobber (reg:CC 17))]
- "FP_REG_P (operands[0]) && reload_completed
+ "reload_completed
&& ((operands_match_p (operands[1], operands[3])
&& operands_match_p (operands[2], operands[4]))
|| (operands_match_p (operands[1], operands[4])
@@ -16400,13 +16376,13 @@
(match_dup 2)))])
(define_split
- [(set (match_operand:DF 0 "register_operand" "")
+ [(set (match_operand:DF 0 "fp_register_operand" "")
(if_then_else:DF (gt (match_operand:DF 1 "register_operand" "")
(match_operand:DF 2 "register_operand" ""))
(match_operand:DF 3 "register_operand" "")
(match_operand:DF 4 "register_operand" "")))
(clobber (reg:CC 17))]
- "FP_REG_P (operands[0]) && reload_completed
+ "reload_completed
&& ((operands_match_p (operands[1], operands[3])
&& operands_match_p (operands[2], operands[4]))
|| (operands_match_p (operands[1], operands[4])
@@ -16628,7 +16604,7 @@
;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison
;; and op2, op0 - zero op2 if comparison was false
;; nand op0, op3 - load op3 to op0 if comparison was false
-;; or op2, op0 - get the non-zero one into the result.
+;; or op2, op0 - get the nonzero one into the result.
(define_split
[(set (match_operand 0 "register_operand" "")
(if_then_else (match_operator 1 "sse_comparison_operator"
@@ -16744,7 +16720,7 @@
(define_split
[(set (match_operand 0 "register_operand" "")
(if_then_else (match_operator 1 "comparison_operator"
- [(match_operand 4 "register_operand" "")
+ [(match_operand 4 "nonimmediate_operand" "")
(match_operand 5 "nonimmediate_operand" "")])
(match_operand 2 "nonmemory_operand" "")
(match_operand 3 "nonmemory_operand" "")))]
@@ -16756,13 +16732,16 @@
(subreg:TI (match_dup 7) 0)))]
{
PUT_MODE (operands[1], GET_MODE (operands[0]));
- if (!sse_comparison_operator (operands[1], VOIDmode))
+ if (!sse_comparison_operator (operands[1], VOIDmode)
+ || !rtx_equal_p (operands[0], operands[4]))
{
rtx tmp = operands[5];
operands[5] = operands[4];
operands[4] = tmp;
PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
}
+ if (!rtx_equal_p (operands[0], operands[4]))
+ abort ();
if (const0_operand (operands[2], GET_MODE (operands[0])))
{
operands[7] = operands[3];
@@ -16788,7 +16767,7 @@
})
(define_insn "allocate_stack_worker_1"
- [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)
+ [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] UNSPEC_STACK_PROBE)
(set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0)))
(clobber (match_dup 0))
(clobber (reg:CC 17))]
@@ -16798,7 +16777,7 @@
(set_attr "length" "5")])
(define_insn "allocate_stack_worker_rex64"
- [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] 3)
+ [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] UNSPEC_STACK_PROBE)
(set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0)))
(clobber (match_dup 0))
(clobber (reg:CC 17))]
@@ -16835,7 +16814,7 @@
[(label_ref (match_operand 0 "" ""))]
"!TARGET_64BIT && flag_pic"
{
- load_pic_register ();
+ emit_insn (gen_set_got (pic_offset_table_rtx));
DONE;
})
@@ -16849,7 +16828,8 @@
(clobber (reg:CC 17))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
&& ((GET_MODE (operands[0]) == HImode
- && (!optimize_size || GET_CODE (operands[2]) != CONST_INT
+ && ((!optimize_size && !TARGET_FAST_PREFIX)
+ || GET_CODE (operands[2]) != CONST_INT
|| CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')))
|| (GET_MODE (operands[0]) == QImode
&& (TARGET_PROMOTE_QImode || optimize_size)))"
@@ -16862,6 +16842,10 @@
operands[2] = gen_lowpart (SImode, operands[2]);
PUT_MODE (operands[3], SImode);")
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
(define_split
[(set (reg 17)
(compare (and (match_operand 1 "aligned_operand" "")
@@ -16870,39 +16854,44 @@
(set (match_operand 0 "register_operand" "")
(and (match_dup 1) (match_dup 2)))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
- && ix86_match_ccmode (insn, CCNOmode)
- && (GET_MODE (operands[0]) == HImode
- || (GET_MODE (operands[0]) == QImode
- && (TARGET_PROMOTE_QImode || optimize_size)))"
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)
+ && ! optimize_size
+ && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX)
+ || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
[(parallel [(set (reg:CCNO 17)
(compare:CCNO (and:SI (match_dup 1) (match_dup 2))
(const_int 0)))
(set (match_dup 0)
(and:SI (match_dup 1) (match_dup 2)))])]
"operands[2]
- = GEN_INT (trunc_int_for_mode (INTVAL (operands[2])
- & GET_MODE_MASK (GET_MODE (operands[0])),
- SImode));
+ = gen_int_mode (INTVAL (operands[2])
+ & GET_MODE_MASK (GET_MODE (operands[0])),
+ SImode);
operands[0] = gen_lowpart (SImode, operands[0]);
operands[1] = gen_lowpart (SImode, operands[1]);")
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
(define_split
[(set (reg 17)
- (compare (and (match_operand 0 "aligned_operand" "")
- (match_operand 1 "const_int_operand" ""))
+ (compare (and (match_operand:HI 0 "aligned_operand" "")
+ (match_operand:HI 1 "const_int_operand" ""))
(const_int 0)))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
- && ix86_match_ccmode (insn, CCNOmode)
- && (GET_MODE (operands[0]) == HImode
- || (GET_MODE (operands[0]) == QImode
- && (TARGET_PROMOTE_QImode || optimize_size)))"
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)
+ && ! TARGET_FAST_PREFIX
+ && ! optimize_size"
[(set (reg:CCNO 17)
(compare:CCNO (and:SI (match_dup 0) (match_dup 1))
(const_int 0)))]
"operands[1]
- = GEN_INT (trunc_int_for_mode (INTVAL (operands[1])
- & GET_MODE_MASK (GET_MODE (operands[0])),
- SImode));
+ = gen_int_mode (INTVAL (operands[1])
+ & GET_MODE_MASK (GET_MODE (operands[0])),
+ SImode);
operands[0] = gen_lowpart (SImode, operands[0]);")
(define_split
@@ -17152,7 +17141,8 @@
(const_int 0)))]
"ix86_match_ccmode (insn, CCNOmode)
&& (true_regnum (operands[0]) != 0
- || CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K'))
+ || (GET_CODE (operands[1]) == CONST_INT
+ && CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K')))
&& find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
[(parallel
[(set (reg:CCNO 17)
@@ -17375,7 +17365,7 @@
[(set (match_operand:SI 0 "register_operand" "")
(subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
(match_operand:DI 2 "const_int_operand" "")) 0))]
- "exact_log2 (INTVAL (operands[1])) >= 0
+ "exact_log2 (INTVAL (operands[2])) >= 0
&& REGNO (operands[0]) == REGNO (operands[1])
&& peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
@@ -17853,52 +17843,92 @@
;; Moves for SSE/MMX regs.
(define_insn "movv4sf_internal"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
(define_insn "movv4si_internal"
- [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "movv2di_internal"
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE"
+ ;; @@@ let's try to use movaps here.
+ "@
+ pxor\t%0, %0
+ movdqa\t{%1, %0|%0, %1}
+ movdqa\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
(define_insn "movv8qi_internal"
- [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxmov")
+ (set_attr "mode" "DI")])
(define_insn "movv4hi_internal"
- [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxmov")
+ (set_attr "mode" "DI")])
(define_insn "movv2si_internal"
- [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "movv2sf_internal"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_3DNOW"
- "movq\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_3DNOW
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_expand "movti"
- [(set (match_operand:TI 0 "general_operand" "")
- (match_operand:TI 1 "general_operand" ""))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "nonimmediate_operand" ""))]
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
@@ -17908,9 +17938,72 @@
DONE;
})
+(define_insn "movv2df_internal"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorpd\t%0, %0
+ movapd\t{%1, %0|%0, %1}
+ movapd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "movv8hi_internal"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "movv16qi_internal"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "movv2df"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE2"
+{
+ ix86_expand_vector_move (V2DFmode, operands);
+ DONE;
+})
+
+(define_expand "movv8hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE2"
+{
+ ix86_expand_vector_move (V8HImode, operands);
+ DONE;
+})
+
+(define_expand "movv16qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
+ (match_operand:V16QI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE2"
+{
+ ix86_expand_vector_move (V16QImode, operands);
+ DONE;
+})
+
(define_expand "movv4sf"
- [(set (match_operand:V4SF 0 "general_operand" "")
- (match_operand:V4SF 1 "general_operand" ""))]
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V4SFmode, operands);
@@ -17918,17 +18011,26 @@
})
(define_expand "movv4si"
- [(set (match_operand:V4SI 0 "general_operand" "")
- (match_operand:V4SI 1 "general_operand" ""))]
- "TARGET_MMX"
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
+ (match_operand:V4SI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
{
ix86_expand_vector_move (V4SImode, operands);
DONE;
})
+(define_expand "movv2di"
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "")
+ (match_operand:V2DI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move (V2DImode, operands);
+ DONE;
+})
+
(define_expand "movv2si"
- [(set (match_operand:V2SI 0 "general_operand" "")
- (match_operand:V2SI 1 "general_operand" ""))]
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+ (match_operand:V2SI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V2SImode, operands);
@@ -17936,8 +18038,8 @@
})
(define_expand "movv4hi"
- [(set (match_operand:V4HI 0 "general_operand" "")
- (match_operand:V4HI 1 "general_operand" ""))]
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V4HImode, operands);
@@ -17945,8 +18047,8 @@
})
(define_expand "movv8qi"
- [(set (match_operand:V8QI 0 "general_operand" "")
- (match_operand:V8QI 1 "general_operand" ""))]
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "")
+ (match_operand:V8QI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V8QImode, operands);
@@ -17954,14 +18056,97 @@
})
(define_expand "movv2sf"
- [(set (match_operand:V2SF 0 "general_operand" "")
- (match_operand:V2SF 1 "general_operand" ""))]
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" ""))]
"TARGET_3DNOW"
{
ix86_expand_vector_move (V2SFmode, operands);
DONE;
})
+(define_insn "*pushv2df"
+ [(set (match_operand:V2DF 0 "push_operand" "=<")
+ (match_operand:V2DF 1 "register_operand" "x"))]
+ "TARGET_SSE"
+ "#")
+
+(define_insn "*pushv2di"
+ [(set (match_operand:V2DI 0 "push_operand" "=<")
+ (match_operand:V2DI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv8hi"
+ [(set (match_operand:V8HI 0 "push_operand" "=<")
+ (match_operand:V8HI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv16qi"
+ [(set (match_operand:V16QI 0 "push_operand" "=<")
+ (match_operand:V16QI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv4sf"
+ [(set (match_operand:V4SF 0 "push_operand" "=<")
+ (match_operand:V4SF 1 "register_operand" "x"))]
+ "TARGET_SSE"
+ "#")
+
+(define_insn "*pushv4si"
+ [(set (match_operand:V4SI 0 "push_operand" "=<")
+ (match_operand:V4SI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv2si"
+ [(set (match_operand:V2SI 0 "push_operand" "=<")
+ (match_operand:V2SI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv4hi"
+ [(set (match_operand:V4HI 0 "push_operand" "=<")
+ (match_operand:V4HI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv8qi"
+ [(set (match_operand:V8QI 0 "push_operand" "=<")
+ (match_operand:V8QI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv2sf"
+ [(set (match_operand:V2SF 0 "push_operand" "=<")
+ (match_operand:V2SF 1 "register_operand" "y"))]
+ "TARGET_3DNOW"
+ "#")
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "register_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3)))
+ (set (match_dup 2) (match_dup 1))]
+ "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+ stack_pointer_rtx);
+ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "register_operand" ""))]
+ "TARGET_64BIT && reload_completed
+ && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+ [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3)))
+ (set (match_dup 2) (match_dup 1))]
+ "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+ stack_pointer_rtx);
+ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
(match_operand:TI 1 "nonmemory_operand" "x"))]
@@ -17971,7 +18156,51 @@
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
(set (mem:TI (reg:SI 7)) (match_dup 1))]
""
- [(set_attr "type" "sse")])
+ [(set_attr "type" "multi")])
+
+(define_insn_and_split "*pushv2df"
+ [(set (match_operand:V2DF 0 "push_operand" "=<")
+ (match_operand:V2DF 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE2"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:V2DF (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "multi")])
+
+(define_insn_and_split "*pushv2di"
+ [(set (match_operand:V2DI 0 "push_operand" "=<")
+ (match_operand:V2DI 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE2"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:V2DI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "multi")])
+
+(define_insn_and_split "*pushv8hi"
+ [(set (match_operand:V8HI 0 "push_operand" "=<")
+ (match_operand:V8HI 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE2"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:V8HI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "multi")])
+
+(define_insn_and_split "*pushv16qi"
+ [(set (match_operand:V16QI 0 "push_operand" "=<")
+ (match_operand:V16QI 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE2"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:V16QI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "multi")])
(define_insn_and_split "*pushv4sf"
[(set (match_operand:V4SF 0 "push_operand" "=<")
@@ -17982,7 +18211,7 @@
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
(set (mem:V4SF (reg:SI 7)) (match_dup 1))]
""
- [(set_attr "type" "sse")])
+ [(set_attr "type" "multi")])
(define_insn_and_split "*pushv4si"
[(set (match_operand:V4SI 0 "push_operand" "=<")
@@ -17993,7 +18222,7 @@
[(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
(set (mem:V4SI (reg:SI 7)) (match_dup 1))]
""
- [(set_attr "type" "sse")])
+ [(set_attr "type" "multi")])
(define_insn_and_split "*pushv2si"
[(set (match_operand:V2SI 0 "push_operand" "=<")
@@ -18041,17 +18270,19 @@
(define_insn "movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:TI 1 "general_operand" "O,xm,x"))]
- "TARGET_SSE && !TARGET_64BIT"
+ (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE && !TARGET_64BIT
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemov,ssemov,ssemov")
+ (set_attr "mode" "V4SF")])
(define_insn "*movti_rex64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+ (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
@@ -18060,8 +18291,8 @@
xorps\t%0, %0
movaps\\t{%1, %0|%0, %1}
movaps\\t{%1, %0|%0, %1}"
- [(set_attr "type" "*,*,sse,sse,sse")
- (set_attr "mode" "TI")])
+ [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
+ (set_attr "mode" "V4SF")])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -18073,74 +18304,116 @@
;; These two patterns are useful for specifying exactly whether to use
;; movaps or movups
-(define_insn "sse_movaps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
+(define_expand "sse_movaps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+ UNSPEC_MOVA))]
"TARGET_SSE"
- "@
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_movaps (tmp, operands[1]));
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+})
-(define_insn "sse_movups"
+(define_insn "*sse_movaps_1"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVA))]
+ "TARGET_SSE
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movaps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov,ssemov")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "sse_movups"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+ UNSPEC_MOVU))]
"TARGET_SSE"
- "@
- movups\t{%1, %0|%0, %1}
- movups\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_movups (tmp, operands[1]));
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+})
+(define_insn "*sse_movups_1"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movups\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt,ssecvt")
+ (set_attr "mode" "V4SF")])
;; SSE Strange Moves.
(define_insn "sse_movmskps"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))]
+ (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
"TARGET_SSE"
"movmskps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "mmx_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
+ (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+ UNSPEC_MOVMSK))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmovmskb\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
(define_insn "mmx_maskmovq"
[(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")] 32))]
+ (match_operand:V8QI 2 "register_operand" "y")]
+ UNSPEC_MASKMOV))]
"(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovq\t{%2, %1|%1, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_maskmovq_rex"
[(set (mem:V8QI (match_operand:DI 0 "register_operand" "D"))
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
- (match_operand:V8QI 2 "register_operand" "y")] 32))]
+ (match_operand:V8QI 2 "register_operand" "y")]
+ UNSPEC_MASKMOV))]
"(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovq\t{%2, %1|%1, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "sse_movntv4sf"
[(set (match_operand:V4SF 0 "memory_operand" "=m")
- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))]
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
"TARGET_SSE"
"movntps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF")])
(define_insn "sse_movntdi"
[(set (match_operand:DI 0 "memory_operand" "=m")
- (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))]
+ (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+ UNSPEC_MOVNT))]
"TARGET_SSE || TARGET_3DNOW_A"
"movntq\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxmov")
+ (set_attr "mode" "DI")])
(define_insn "sse_movhlps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18154,7 +18427,8 @@
(const_int 3)))]
"TARGET_SSE"
"movhlps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "sse_movlhps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18168,7 +18442,8 @@
(const_int 12)))]
"TARGET_SSE"
"movlhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "sse_movhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
@@ -18179,7 +18454,8 @@
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "sse_movlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
@@ -18190,17 +18466,29 @@
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" "")]
+ "TARGET_SSE"
+{
+ emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+ CONST0_RTX (V4SFmode)));
+ DONE;
+})
+
+(define_insn "sse_loadss_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (match_operand:V4SF 1 "memory_operand" "m")
- (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+ (match_operand:V4SF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
(define_insn "sse_movss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18210,7 +18498,8 @@
(const_int 1)))]
"TARGET_SSE"
"movss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
(define_insn "sse_storess"
[(set (match_operand:SF 0 "memory_operand" "=m")
@@ -18219,17 +18508,20 @@
(parallel [(const_int 0)])))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
(define_insn "sse_shufps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
- (match_operand:SI 3 "immediate_operand" "i")] 41))]
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_SHUFFLE))]
"TARGET_SSE"
;; @@@ check operand order for intel/nonintel syntax
"shufps\t{%3, %2, %0|%0, %2, %3}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
;; SSE arithmetic
@@ -18240,7 +18532,8 @@
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"addps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
(define_insn "vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18251,7 +18544,8 @@
(const_int 1)))]
"TARGET_SSE"
"addss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
(define_insn "subv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18259,7 +18553,8 @@
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"subps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
(define_insn "vmsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18270,7 +18565,8 @@
(const_int 1)))]
"TARGET_SSE"
"subss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "SF")])
(define_insn "mulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18278,7 +18574,8 @@
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"mulps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "V4SF")])
(define_insn "vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18289,7 +18586,8 @@
(const_int 1)))]
"TARGET_SSE"
"mulss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "SF")])
(define_insn "divv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18297,7 +18595,8 @@
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"divps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "V4SF")])
(define_insn "vmdivv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18308,7 +18607,8 @@
(const_int 1)))]
"TARGET_SSE"
"divss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "SF")])
;; SSE square root/reciprocal
@@ -18316,45 +18616,52 @@
(define_insn "rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
(define_insn "vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
(define_insn "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
- [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
"TARGET_SSE"
"rsqrtps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
(define_insn "vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
(define_insn "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"sqrtps\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
(define_insn "vmsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18364,206 +18671,358 @@
(const_int 1)))]
"TARGET_SSE"
"sqrtss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
;; SSE logical operations.
+;; SSE defines logical operations on floating point values. This brings
+;; interesting challenge to RTL representation where logicals are only valid
+;; on integral types. We deal with this by representing the floating point
+;; logical as logical on arguments casted to TImode as this is what hardware
+;; really does. Unfortunately hardware requires the type information to be
+;; present and thus we must avoid subregs from being simplified and elliminated
+;; in later compilation phases.
+;;
+;; We have following variants from each instruction:
+;; sse_andsf3 - the operation taking V4SF vector operands
+;; and doing TImode cast on them
+;; *sse_andsf3_memory - the operation taking one memory operand casted to
+;; TImode, since backend insist on elliminating casts
+;; on memory operands
+;; sse_andti3_sf_1 - the operation taking SF scalar operands.
+;; We can not accept memory operand here as instruction reads
+;; whole scalar. This is generated only post reload by GCC
+;; scalar float operations that expands to logicals (fabs)
+;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode
+;; memory operand. Eventually combine can be able
+;; to synthetize these using splitter.
+;; sse2_anddf3, *sse2_anddf3_memory
+;;
+;;
;; These are not called andti3 etc. because we really really don't want
;; the compiler to widen DImode ands to TImode ands and then try to move
;; into DImode subregs of SSE registers, and them together, and move out
;; of DImode subregs again!
+;; SSE1 single precision floating point logical operation
+(define_expand "sse_andv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (and:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE"
+ "")
-(define_insn "*sse_andti3_df_1"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
- "TARGET_SSE2"
- "andpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
-
-(define_insn "*sse_andti3_df_2"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "Ym")))]
- "TARGET_SSE2"
- "andpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+(define_insn "*sse_andv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "andps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_andti3_sf_1"
+(define_insn "*sse_andsf3"
[(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
- "TARGET_SSE"
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"andps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "sse_nandv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (and:TI (not:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0))
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE"
+ "")
+
+(define_insn "*sse_nandv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_andti3_sf_2"
+(define_insn "*sse_nandsf3"
[(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
- "andps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "sse_andti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+(define_expand "sse_iorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (ior:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE"
+ "")
+
+(define_insn "*sse_iorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2
+ "TARGET_SSE
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "andps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "orps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_andti3_sse2"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+(define_insn "*sse_iorsf3"
+ [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2
+ "TARGET_SSE
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "pand\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "orps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_nandti3_df"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0))
- (match_operand:TI 2 "nonimmediate_operand" "Ym")))]
- "TARGET_SSE2"
- "andnpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+(define_expand "sse_xorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "") 0)
+ (xor:TI (subreg:TI (match_operand:V4SF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V4SF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "")
+
+(define_insn "*sse_xorv4sf3"
+ [(set (subreg:TI (match_operand:V4SF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xorps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "*sse_nandti3_sf"
+(define_insn "*sse_xorsf3"
[(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0))
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "andnps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "TARGET_SSE
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xorps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
-(define_insn "sse_nandti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+;; SSE2 double precision floating point logical operation
+
+(define_expand "sse2_andv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (and:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE2"
+ "")
+
+(define_insn "*sse2_andv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2"
- "andnps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "andpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_nandti3_sse2"
- [(set (match_operand:TI 0 "register_operand" "=x")
- (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+(define_insn "*sse2_andv2df3"
+ [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "andpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_nandv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (and:TI (not:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0))
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
"TARGET_SSE2"
- "pnand\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "")
-(define_insn "*sse_iorti3_df_1"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
+(define_insn "*sse2_nandv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
- "orpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_df_2"
+(define_insn "*sse_nandti3_df"
[(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
(match_operand:TI 2 "nonimmediate_operand" "Ym")))]
"TARGET_SSE2"
- "orpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
-
-(define_insn "*sse_iorti3_sf_1"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
- "TARGET_SSE"
- "orps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "andnpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_sf_2"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "orps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+(define_expand "sse2_iorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (ior:TI (subreg:TI (match_operand:V2DF 1 "register_operand" "") 0)
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
+ "TARGET_SSE2"
+ "")
-(define_insn "sse_iorti3"
- [(set (match_operand:TI 0 "register_operand" "=x")
+(define_insn "*sse2_iorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
(ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2
+ "TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "orps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "orpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_iorti3_sse2"
- [(set (match_operand:TI 0 "register_operand" "=x")
+(define_insn "*sse2_iordf3"
+ [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
(ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "por\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "orpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_xorti3_df_1"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))]
+(define_expand "sse2_xorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "") 0)
+ (xor:TI (subreg:TI (match_operand:V2DF 1 "nonimmediate_operand" "") 0)
+ (subreg:TI (match_operand:V2DF 2 "nonimmediate_operand" "") 0)))]
"TARGET_SSE2"
+ "")
+
+(define_insn "*sse2_xorv2df3"
+ [(set (subreg:TI (match_operand:V2DF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"xorpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_xorti3_df_2"
- [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
- (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)
- (match_operand:TI 2 "nonimmediate_operand" "Ym")))]
- "TARGET_SSE2"
+(define_insn "*sse2_xordf3"
+ [(set (subreg:TI (match_operand:DF 0 "register_operand" "=x") 0)
+ (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"xorpd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
-(define_insn "*sse_xorti3_sf_1"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0)
- (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))]
- "TARGET_SSE"
- "xorps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+;; SSE2 integral logicals. These patterns must always come after floating
+;; point ones since we don't want compiler to use integer opcodes on floating
+;; point SSE values to avoid matching of subregs in the match_operand.
+(define_insn "*sse2_andti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "pand\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
-(define_insn "*sse_xorti3_sf_2"
- [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0)
- (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)
+(define_insn "sse2_andv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "pand\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_nandti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE"
- "xorps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "TARGET_SSE2"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
-(define_insn "sse_xorti3"
+(define_insn "sse2_nandv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0"))
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_iorti3"
[(set (match_operand:TI 0 "register_operand" "=x")
- (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
+ (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
- "TARGET_SSE && !TARGET_SSE2
+ "TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
- "xorps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ "por\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_iorv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "por\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
-(define_insn "*sse_xorti3_sse2"
+(define_insn "*sse2_xorti3"
[(set (match_operand:TI 0 "register_operand" "=x")
(xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
(match_operand:TI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
"pxor\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_xorv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "pxor\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "TI")])
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(const_int 0)] 45))]
+ (unspec:V4SF [(const_int 0)] UNSPEC_NOP))]
"TARGET_SSE"
"xorps\t{%0, %0|%0, %0}"
- [(set_attr "type" "sse")
- (set_attr "memory" "none")])
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none")
+ (set_attr "mode" "V4SF")])
+
+;; Use xor, but don't show input operands so they aren't live before
+;; this insn.
+(define_insn "sse_clrv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(const_int 0)] UNSPEC_NOP))]
+ "TARGET_SSE2"
+ "xorpd\t{%0, %0|%0, %0}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none")
+ (set_attr "mode" "V4SF")])
;; SSE mask-generating compares
@@ -18574,7 +19033,8 @@
(match_operand:V4SF 2 "register_operand" "x")]))]
"TARGET_SSE"
"cmp%D3ps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "V4SF")])
(define_insn "maskncmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
@@ -18589,7 +19049,8 @@
else
return "cmpn%D3ps\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "V4SF")])
(define_insn "vmmaskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
@@ -18597,11 +19058,12 @@
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")])
- (match_dup 1)
+ (subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
"cmp%D3ss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
(define_insn "vmmaskncmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
@@ -18619,33 +19081,34 @@
else
return "cmpn%D3ss\t{%2, %0|%0, %2}";
}
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
(define_insn "sse_comi"
[(set (reg:CCFP 17)
- (match_operator:CCFP 2 "sse_comparison_operator"
- [(vec_select:SF
- (match_operand:V4SF 0 "register_operand" "x")
- (parallel [(const_int 0)]))
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
- (parallel [(const_int 0)]))]))]
+ (compare:CCFP (vec_select:SF
+ (match_operand:V4SF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
"TARGET_SSE"
"comiss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
(define_insn "sse_ucomi"
[(set (reg:CCFPU 17)
- (match_operator:CCFPU 2 "sse_comparison_operator"
- [(vec_select:SF
- (match_operand:V4SF 0 "register_operand" "x")
- (parallel [(const_int 0)]))
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
- (parallel [(const_int 0)]))]))]
+ (compare:CCFPU (vec_select:SF
+ (match_operand:V4SF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
"TARGET_SSE"
"ucomiss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
;; SSE unpack
@@ -18666,7 +19129,8 @@
(const_int 5)))]
"TARGET_SSE"
"unpckhps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "sse_unpcklps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18684,7 +19148,8 @@
(const_int 5)))]
"TARGET_SSE"
"unpcklps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
;; SSE min/max
@@ -18695,7 +19160,8 @@
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"maxps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
(define_insn "vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18706,7 +19172,8 @@
(const_int 1)))]
"TARGET_SSE"
"maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
(define_insn "sminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18714,7 +19181,8 @@
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"minps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V4SF")])
(define_insn "vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18725,8 +19193,8 @@
(const_int 1)))]
"TARGET_SSE"
"minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
-
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
;; SSE <-> integer/MMX conversions
@@ -18739,7 +19207,8 @@
(const_int 12)))]
"TARGET_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -18748,16 +19217,19 @@
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvtps2pi\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
(define_insn "cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX)
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvttps2pi\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
(define_insn "cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
@@ -18768,7 +19240,21 @@
(const_int 14)))]
"TARGET_SSE"
"cvtsi2ss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
+(define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,vector")
+ (set_attr "mode" "SF")])
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -18777,16 +19263,42 @@
(parallel [(const_int 0)])))]
"TARGET_SSE"
"cvtss2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
+(define_insn "cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,vector")
+ (set_attr "mode" "SF")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX)
(parallel [(const_int 0)])))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
+(define_insn "cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
+ UNSPEC_FIX)
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "vector,vector")])
;; MMX insns
@@ -18795,59 +19307,77 @@
(define_insn "addv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "addv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "addv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_adddi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(plus:DI (match_operand:DI 1 "register_operand" "%0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
+ "TARGET_MMX"
+ "paddq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "ssaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "ssaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "usaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "usaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "subv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -18855,7 +19385,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "subv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -18863,7 +19394,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "subv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -18871,7 +19403,19 @@
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_subdi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(minus:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
+ "TARGET_MMX"
+ "psubq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "sssubv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -18879,7 +19423,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubsb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "sssubv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -18887,7 +19432,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "ussubv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -18895,7 +19441,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubusb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "ussubv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -18903,7 +19450,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"psubusw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "mulv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -18911,7 +19459,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pmullw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
(define_insn "smulv4hi3_highpart"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -18924,7 +19473,8 @@
(const_int 16))))]
"TARGET_MMX"
"pmulhw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
(define_insn "umulv4hi3_highpart"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -18937,7 +19487,8 @@
(const_int 16))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
(define_insn "mmx_pmaddwd"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -18958,7 +19509,8 @@
(const_int 3)]))))))]
"TARGET_MMX"
"pmaddwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
;; MMX logical operations
@@ -18968,49 +19520,58 @@
(define_insn "mmx_iordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(ior:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ [(ior:DI (match_operand:DI 1 "register_operand" "%0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
"TARGET_MMX"
"por\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "mmx_xordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(xor:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ [(xor:DI (match_operand:DI 1 "register_operand" "%0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
"TARGET_MMX"
"pxor\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")
(set_attr "memory" "none")])
;; Same as pxor, but don't show input operands so that we don't think
;; they are live.
(define_insn "mmx_clrdi"
[(set (match_operand:DI 0 "register_operand" "=y")
- (unspec:DI [(const_int 0)] 45))]
+ (unspec:DI [(const_int 0)] UNSPEC_NOP))]
"TARGET_MMX"
"pxor\t{%0, %0|%0, %0}"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")
(set_attr "memory" "none")])
(define_insn "mmx_anddi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(and:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ [(and:DI (match_operand:DI 1 "register_operand" "%0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
"TARGET_MMX"
"pand\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "mmx_nanddi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
- (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
"TARGET_MMX"
"pandn\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
;; MMX unsigned averages/sum of absolute differences
@@ -19032,7 +19593,8 @@
(const_int 1)))]
"TARGET_SSE || TARGET_3DNOW_A"
"pavgb\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "mmx_uavgv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19047,15 +19609,18 @@
(const_int 1)))]
"TARGET_SSE || TARGET_3DNOW_A"
"pavgw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "mmx_psadbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
- (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))]
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSADBW))]
"TARGET_SSE || TARGET_3DNOW_A"
"psadbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
;; MMX insert/extract/shuffle
@@ -19068,7 +19633,8 @@
(match_operand:SI 3 "immediate_operand" "i")))]
"TARGET_SSE || TARGET_3DNOW_A"
"pinsrw\t{%3, %2, %0|%0, %2, %3}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_pextrw"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -19077,15 +19643,18 @@
[(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pextrw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_pshufw"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
- (match_operand:SI 2 "immediate_operand" "i")] 41))]
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_SHUFFLE))]
"TARGET_SSE || TARGET_3DNOW_A"
"pshufw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
;; MMX mask-generating comparisons
@@ -19096,7 +19665,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pcmpeqb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
(define_insn "eqv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19104,7 +19674,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pcmpeqw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
(define_insn "eqv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19112,7 +19683,8 @@
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pcmpeqd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
(define_insn "gtv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -19120,7 +19692,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pcmpgtb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
(define_insn "gtv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19128,7 +19701,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pcmpgtw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
(define_insn "gtv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19136,7 +19710,8 @@
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"pcmpgtd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
;; MMX max/min insns
@@ -19147,7 +19722,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmaxub\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "smaxv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19155,7 +19731,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmaxsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "uminv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -19163,7 +19740,8 @@
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_SSE || TARGET_3DNOW_A"
"pminub\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
(define_insn "sminv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19171,7 +19749,8 @@
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_SSE || TARGET_3DNOW_A"
"pminsw\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
;; MMX shifts
@@ -19182,7 +19761,8 @@
(match_operand:DI 2 "nonmemory_operand" "yi")))]
"TARGET_MMX"
"psraw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "ashrv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19190,7 +19770,8 @@
(match_operand:DI 2 "nonmemory_operand" "yi")))]
"TARGET_MMX"
"psrad\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "lshrv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19198,7 +19779,8 @@
(match_operand:DI 2 "nonmemory_operand" "yi")))]
"TARGET_MMX"
"psrlw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "lshrv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19206,17 +19788,20 @@
(match_operand:DI 2 "nonmemory_operand" "yi")))]
"TARGET_MMX"
"psrld\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
;; See logical MMX insns.
(define_insn "mmx_lshrdi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))]
+ (match_operand:DI 2 "nonmemory_operand" "yi"))]
+ UNSPEC_NOP))]
"TARGET_MMX"
"psrlq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "ashlv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19224,7 +19809,8 @@
(match_operand:DI 2 "nonmemory_operand" "yi")))]
"TARGET_MMX"
"psllw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "ashlv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19232,17 +19818,20 @@
(match_operand:DI 2 "nonmemory_operand" "yi")))]
"TARGET_MMX"
"pslld\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
;; See logical MMX insns.
(define_insn "mmx_ashldi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(ashift:DI (match_operand:DI 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))]
+ (match_operand:DI 2 "nonmemory_operand" "yi"))]
+ UNSPEC_NOP))]
"TARGET_MMX"
"psllq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
;; MMX pack/unpack insns.
@@ -19254,7 +19843,8 @@
(ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
"TARGET_MMX"
"packsswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "mmx_packssdw"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19263,7 +19853,8 @@
(ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
"TARGET_MMX"
"packssdw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "mmx_packuswb"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -19272,7 +19863,8 @@
(us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
"TARGET_MMX"
"packuswb\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
(define_insn "mmx_punpckhbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -19298,7 +19890,8 @@
(const_int 85)))]
"TARGET_MMX"
"punpckhbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_punpckhwd"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19316,21 +19909,21 @@
(const_int 5)))]
"TARGET_MMX"
"punpckhwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_punpckhdq"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_merge:V2SI
- (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
- (parallel [(const_int 0)
- (const_int 1)]))
+ (match_operand:V2SI 1 "register_operand" "0")
(vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
(parallel [(const_int 1)
(const_int 0)]))
(const_int 1)))]
"TARGET_MMX"
"punpckhdq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_punpcklbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
@@ -19356,7 +19949,8 @@
(const_int 85)))]
"TARGET_MMX"
"punpcklbw\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_punpcklwd"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19374,7 +19968,8 @@
(const_int 5)))]
"TARGET_MMX"
"punpcklwd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
(define_insn "mmx_punpckldq"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19382,19 +19977,18 @@
(vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
(parallel [(const_int 1)
(const_int 0)]))
- (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
- (parallel [(const_int 0)
- (const_int 1)]))
+ (match_operand:V2SI 2 "register_operand" "y")
(const_int 1)))]
"TARGET_MMX"
"punpckldq\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
;; Miscellaneous stuff
(define_insn "emms"
- [(unspec_volatile [(const_int 0)] 31)
+ [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
(clobber (reg:XF 8))
(clobber (reg:XF 9))
(clobber (reg:XF 10))
@@ -19417,23 +20011,24 @@
(set_attr "memory" "unknown")])
(define_insn "ldmxcsr"
- [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)]
- "TARGET_MMX"
+ [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
+ UNSPECV_LDMXCSR)]
+ "TARGET_SSE"
"ldmxcsr\t%0"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "sse")
(set_attr "memory" "load")])
(define_insn "stmxcsr"
[(set (match_operand:SI 0 "memory_operand" "=m")
- (unspec_volatile:SI [(const_int 0)] 40))]
- "TARGET_MMX"
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
+ "TARGET_SSE"
"stmxcsr\t%0"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "sse")
(set_attr "memory" "store")])
(define_expand "sfence"
[(set (match_dup 0)
- (unspec:BLK [(match_dup 0)] 44))]
+ (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
"TARGET_SSE || TARGET_3DNOW_A"
{
operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
@@ -19442,7 +20037,7 @@
(define_insn "*sfence_insn"
[(set (match_operand:BLK 0 "" "")
- (unspec:BLK [(match_dup 0)] 44))]
+ (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
"TARGET_SSE || TARGET_3DNOW_A"
"sfence"
[(set_attr "type" "sse")
@@ -19457,7 +20052,7 @@
(reg:DI 25)
(reg:DI 26)
(reg:DI 27)
- (reg:DI 28)] 13))
+ (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
(use (match_operand:DI 1 "register_operand" ""))
(use (match_operand:DI 2 "immediate_operand" ""))
(use (label_ref:DI (match_operand 3 "" "")))])]
@@ -19474,7 +20069,7 @@
(reg:DI 25)
(reg:DI 26)
(reg:DI 27)
- (reg:DI 28)] 13))
+ (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
(use (match_operand:DI 1 "register_operand" "r"))
(use (match_operand:DI 2 "const_int_operand" "i"))
(use (label_ref:DI (match_operand 3 "" "X")))]
@@ -19517,7 +20112,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfadd\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "subv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19525,7 +20121,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfsub\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "subrv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19533,7 +20130,8 @@
(match_operand:V2SF 1 "register_operand" "0")))]
"TARGET_3DNOW"
"pfsubr\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "gtv2sf3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19541,7 +20139,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfcmpgt\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
(define_insn "gev2sf3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19549,7 +20148,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfcmpge\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
(define_insn "eqv2sf3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19557,7 +20157,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfcmpeq\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
(define_insn "pfmaxv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19565,7 +20166,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfmax\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "pfminv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19573,7 +20175,8 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfmin\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19581,10 +20184,11 @@
(match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pfmul\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "V2SF")])
(define_insn "femms"
- [(unspec_volatile [(const_int 0)] 46)
+ [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
(clobber (reg:XF 8))
(clobber (reg:XF 9))
(clobber (reg:XF 10))
@@ -19603,14 +20207,16 @@
(clobber (reg:DI 36))]
"TARGET_3DNOW"
"femms"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
(define_insn "pf2id"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pf2id\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
(define_insn "pf2iw"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19619,7 +20225,8 @@
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
"TARGET_3DNOW_A"
"pf2iw\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
(define_insn "pfacc"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19636,7 +20243,8 @@
(parallel [(const_int 1)])))))]
"TARGET_3DNOW"
"pfacc\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "pfnacc"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19653,7 +20261,8 @@
(parallel [(const_int 1)])))))]
"TARGET_3DNOW_A"
"pfnacc\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "pfpnacc"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19670,7 +20279,8 @@
(parallel [(const_int 1)])))))]
"TARGET_3DNOW_A"
"pfpnacc\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
(define_insn "pi2fw"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19686,14 +20296,16 @@
(parallel [(const_int 1)])))))))]
"TARGET_3DNOW_A"
"pi2fw\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
(define_insn "floatv2si2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
"TARGET_3DNOW"
"pi2fd\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
;; This insn is identical to pavgb in operation, but the opcode is
;; different. To avoid accidentally matching pavgb, use an unspec.
@@ -19702,50 +20314,62 @@
[(set (match_operand:V8QI 0 "register_operand" "=y")
(unspec:V8QI
[(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))]
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PAVGUSB))]
"TARGET_3DNOW"
"pavgusb\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "TI")])
;; 3DNow reciprical and sqrt
(define_insn "pfrcpv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
- (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))]
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCP))]
"TARGET_3DNOW"
"pfrcp\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "TI")])
(define_insn "pfrcpit1v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))]
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCPIT1))]
"TARGET_3DNOW"
"pfrcpit1\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "TI")])
(define_insn "pfrcpit2v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))]
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCPIT2))]
"TARGET_3DNOW"
"pfrcpit2\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "TI")])
(define_insn "pfrsqrtv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
- (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))]
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRSQRT))]
"TARGET_3DNOW"
- "pfrsqrt\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ "pfrsqrt\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "TI")])
(define_insn "pfrsqit1v2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=y")
(unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))]
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRSQIT1))]
"TARGET_3DNOW"
"pfrsqit1\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "TI")])
(define_insn "pmulhrwv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
@@ -19764,7 +20388,8 @@
(const_int 16))))]
"TARGET_3DNOW"
"pmulhrw\\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "TI")])
(define_insn "pswapdv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -19772,7 +20397,8 @@
(parallel [(const_int 1) (const_int 0)])))]
"TARGET_3DNOW_A"
"pswapd\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "TI")])
(define_insn "pswapdv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
@@ -19780,7 +20406,8 @@
(parallel [(const_int 1) (const_int 0)])))]
"TARGET_3DNOW_A"
"pswapd\\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "TI")])
(define_expand "prefetch"
[(prefetch (match_operand 0 "address_operand" "")
@@ -19843,7 +20470,8 @@
return patterns[locality];
}
- [(set_attr "type" "sse")])
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")])
(define_insn "*prefetch_3dnow"
[(prefetch (match_operand:SI 0 "address_operand" "p")
@@ -19870,4 +20498,1577 @@
else
return "prefetchw\t%a0";
}
- [(set_attr "type" "mmx")])
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
+
+;; SSE2 support
+
+(define_insn "addv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "addpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmaddv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "addsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "subv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "subpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "subsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "mulv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "mulpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmmulv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "mulsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "DF")])
+
+(define_insn "divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "divpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmdivv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "divsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "DF")])
+
+;; SSE min/max
+
+(define_insn "smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "maxpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmsmaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "maxsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+
+(define_insn "sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "minpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmsminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "minsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "DF")])
+;; SSE2 square root. There doesn't appear to be an extension for the
+;; reciprocal/rsqrt instructions if the Intel manual is to be believed.
+
+(define_insn "sqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))]
+ "TARGET_SSE2"
+ "sqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmsqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
+ (match_operand:V2DF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "sqrtsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+;; SSE mask-generating compares
+
+(define_insn "maskcmpv2df3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (match_operator:V2DI 3 "sse_comparison_operator"
+ [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "x")]))]
+ "TARGET_SSE2"
+ "cmp%D3pd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "maskncmpv2df3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (not:V2DI
+ (match_operator:V2DI 3 "sse_comparison_operator"
+ [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "x")])))]
+ "TARGET_SSE2"
+{
+ if (GET_CODE (operands[3]) == UNORDERED)
+ return "cmpordps\t{%2, %0|%0, %2}";
+ else
+ return "cmpn%D3pd\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "vmmaskcmpv2df3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (match_operator:V2DI 3 "sse_comparison_operator"
+ [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "x")])
+ (subreg:V2DI (match_dup 1) 0)
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cmp%D3sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
+(define_insn "vmmaskncmpv2df3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (not:V2DI
+ (match_operator:V2DI 3 "sse_comparison_operator"
+ [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "x")]))
+ (subreg:V2DI (match_dup 1) 0)
+ (const_int 1)))]
+ "TARGET_SSE2"
+{
+ if (GET_CODE (operands[3]) == UNORDERED)
+ return "cmpordsd\t{%2, %0|%0, %2}";
+ else
+ return "cmpn%D3sd\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_comi"
+ [(set (reg:CCFP 17)
+ (compare:CCFP (vec_select:DF
+ (match_operand:V2DF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "comisd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_ucomi"
+ [(set (reg:CCFPU 17)
+ (compare:CCFPU (vec_select:DF
+ (match_operand:V2DF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "ucomisd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
+;; SSE Strange Moves.
+
+(define_insn "sse2_movmskpd"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "movmskpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "pmovmskb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_maskmovdqu"
+ [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovdqu\t{%2, %1|%1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_maskmovdqu_rex64"
+ [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovdqu\t{%2, %1|%1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movntv2df"
+ [(set (match_operand:V2DF 0 "memory_operand" "=m")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movntpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movntv2di"
+ [(set (match_operand:V2DI 0 "memory_operand" "=m")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movntdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movntsi"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movnti\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+;; SSE <-> integer/MMX conversions
+
+;; Conversions between SI and SF
+
+(define_insn "cvtdq2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "cvtps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "cvttps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX))]
+ "TARGET_SSE2"
+ "cvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;; Conversions between SI and DF
+
+(define_insn "cvtdq2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float:V2DF (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel
+ [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE2"
+ "cvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_SSE2"
+ "cvtpd2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX)
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_SSE2"
+ "cvttpd2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "cvtpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvtpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "cvttpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX))]
+ "TARGET_SSE2"
+ "cvttpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "cvtpi2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+ "TARGET_SSE2"
+ "cvtpi2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;; Conversions between SI and DF
+
+(define_insn "cvtsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "cvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SI")])
+
+(define_insn "cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SI")])
+
+(define_insn "cvttsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ (parallel [(const_int 0)]))] UNSPEC_FIX))]
+ "TARGET_SSE2"
+ "cvttsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SI")])
+
+(define_insn "cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
+ (parallel [(const_int 0)]))] UNSPEC_FIX))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector,vector")])
+
+(define_insn "cvtsi2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (vec_duplicate:V2DF
+ (float:DF
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 2)))]
+ "TARGET_SSE2"
+ "cvtsi2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
+ (vec_duplicate:V2DF
+ (float:DF
+ (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 2)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "vector,direct")])
+
+;; Conversions between SF and DF
+
+(define_insn "cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "register_operand" "xm")))
+ (const_int 14)))]
+ "TARGET_SSE2"
+ "cvtsd2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
+(define_insn "cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "register_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)])))
+ (const_int 2)))]
+ "TARGET_SSE2"
+ "cvtss2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "cvtpd2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (subreg:V4SF
+ (vec_concat:V4SI
+ (subreg:V2SI (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0)
+ (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))]
+ "TARGET_SSE2"
+ "cvtpd2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "cvtps2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float_extend:V2DF
+ (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE2"
+ "cvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+;; SSE2 variants of MMX insns
+
+;; MMX arithmetic
+
+(define_insn "addv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "addv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "addv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "addv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssaddv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddsb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssaddv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "usaddv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddusb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "usaddv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "paddusw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "subv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "subv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "subv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (minus:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "subv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (minus:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sssubv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubsb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sssubv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ussubv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubusb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "ussubv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "psubusw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (mult:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pmullw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
+ (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "pmulhw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
+ (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "pmulhuw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_umulsidi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (mult:DI (zero_extend:DI (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)])))
+ (zero_extend:DI (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)])))))]
+ "TARGET_SSE2"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE2"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSE2"
+ "pmaddwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+;; Same as pxor, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "sse2_clrti"
+ [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
+ "TARGET_SSE2"
+ "pxor\t{%0, %0|%0, %0}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "memory" "none")
+ (set_attr "mode" "TI")])
+
+;; MMX unsigned averages/sum of absolute differences
+
+(define_insn "sse2_uavgv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (ashiftrt:V16QI
+ (plus:V16QI (plus:V16QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (const_vector:V16QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "pavgb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_uavgv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ashiftrt:V8HI
+ (plus:V8HI (plus:V8HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "pavgw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+;; @@@ this isn't the right representation.
+(define_insn "sse2_psadbw"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSADBW))]
+ "TARGET_SSE2"
+ "psadbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+
+;; MMX insert/extract/shuffle
+
+(define_insn "sse2_pinsrw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (vec_duplicate:V8HI
+ (truncate:HI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:SI 3 "immediate_operand" "i")))]
+ "TARGET_SSE2"
+ "pinsrw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_pextrw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:HI (match_operand:V8HI 1 "register_operand" "x")
+ (parallel
+ [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_SSE2"
+ "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_pshufd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_SHUFFLE))]
+ "TARGET_SSE2"
+ "pshufd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_pshuflw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_PSHUFLW))]
+ "TARGET_SSE2"
+ "pshuflw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_pshufhw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_PSHUFHW))]
+ "TARGET_SSE2"
+ "pshufhw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;; MMX mask-generating comparisons
+
+(define_insn "eqv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (eq:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpeqb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_insn "eqv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (eq:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpeqw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_insn "eqv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (eq:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpeqd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_insn "gtv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (gt:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpgtb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_insn "gtv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (gt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpgtw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_insn "gtv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (gt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pcmpgtd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+
+;; MMX max/min insns
+
+(define_insn "umaxv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (umax:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pmaxub\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "smaxv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (smax:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pmaxsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "uminv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (umin:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pminub\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sminv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (smin:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pminsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "mode" "TI")])
+
+
+;; MMX shifts
+
+(define_insn "ashrv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psraw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashrv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psrad\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshrv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psrlw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshrv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psrld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshrv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psrlq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashlv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psllw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashlv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "pslld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashlv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
+ "TARGET_SSE2"
+ "psllq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashrv8hi3_ti"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psraw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashrv4si3_ti"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psrad\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshrv8hi3_ti"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psrlw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshrv4si3_ti"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psrld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshrv2di3_ti"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psrlq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashlv8hi3_ti"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psllw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashlv4si3_ti"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "pslld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashlv2di3_ti"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
+ "TARGET_SSE2"
+ "psllq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+;; See logical MMX insns for the reason for the unspec. Strictly speaking
+;; we wouldn't need here it since we never generate TImode arithmetic.
+
+;; There has to be some kind of prize for the weirdest new instruction...
+(define_insn "sse2_ashlti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI
+ [(ashift:TI (match_operand:TI 1 "register_operand" "0")
+ (mult:SI (match_operand:SI 2 "immediate_operand" "i")
+ (const_int 8)))] UNSPEC_NOP))]
+ "TARGET_SSE2"
+ "pslldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_lshrti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI
+ [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (mult:SI (match_operand:SI 2 "immediate_operand" "i")
+ (const_int 8)))] UNSPEC_NOP))]
+ "TARGET_SSE2"
+ "psrldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+;; SSE unpack
+
+(define_insn "sse2_unpckhpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 1)]))
+ (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "unpckhpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_unpcklpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2"
+ "unpcklpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;; MMX pack/unpack insns.
+
+(define_insn "sse2_packsswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
+ (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
+ "TARGET_SSE2"
+ "packsswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packssdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0"))
+ (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))]
+ "TARGET_SSE2"
+ "packssdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packuswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
+ (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
+ "TARGET_SSE2"
+ "packuswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_merge:V16QI
+ (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (parallel [(const_int 8) (const_int 0)
+ (const_int 9) (const_int 1)
+ (const_int 10) (const_int 2)
+ (const_int 11) (const_int 3)
+ (const_int 12) (const_int 4)
+ (const_int 13) (const_int 5)
+ (const_int 14) (const_int 6)
+ (const_int 15) (const_int 7)]))
+ (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)]))
+ (const_int 21845)))]
+ "TARGET_SSE2"
+ "punpckhbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 4) (const_int 0)
+ (const_int 5) (const_int 1)
+ (const_int 6) (const_int 2)
+ (const_int 7) (const_int 3)]))
+ (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)
+ (const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)]))
+ (const_int 85)))]
+ "TARGET_SSE2"
+ "punpckhwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhdq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 2) (const_int 0)
+ (const_int 3) (const_int 1)]))
+ (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 1) (const_int 3)]))
+ (const_int 5)))]
+ "TARGET_SSE2"
+ "punpckhdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_merge:V16QI
+ (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)]))
+ (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
+ (parallel [(const_int 8) (const_int 0)
+ (const_int 9) (const_int 1)
+ (const_int 10) (const_int 2)
+ (const_int 11) (const_int 3)
+ (const_int 12) (const_int 4)
+ (const_int 13) (const_int 5)
+ (const_int 14) (const_int 6)
+ (const_int 15) (const_int 7)]))
+ (const_int 21845)))]
+ "TARGET_SSE2"
+ "punpcklbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)
+ (const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)]))
+ (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
+ (parallel [(const_int 4) (const_int 0)
+ (const_int 5) (const_int 1)
+ (const_int 6) (const_int 2)
+ (const_int 7) (const_int 3)]))
+ (const_int 85)))]
+ "TARGET_SSE2"
+ "punpcklwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckldq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 1) (const_int 3)]))
+ (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
+ (parallel [(const_int 2) (const_int 0)
+ (const_int 3) (const_int 1)]))
+ (const_int 5)))]
+ "TARGET_SSE2"
+ "punpckldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
+ (parallel [(const_int 1)
+ (const_int 0)]))
+ (match_operand:V2DI 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "punpcklqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
+ (parallel [(const_int 1)
+ (const_int 0)]))
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "punpckhqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+;; SSE2 moves
+
+(define_insn "sse2_movapd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVA))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movapd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movupd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movupd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movdqa"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVA))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movdqa\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movdqu"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movdq2q"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
+ (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && !TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movdq2q_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
+ (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movq2dq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
+ (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
+ (const_int 0)))]
+ "TARGET_SSE2 && !TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt,ssemov")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movq2dq_rex64"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
+ (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
+ (const_int 0)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt,ssemov,ssecvt")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_concat:V2DI (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (const_int 0)))]
+ "TARGET_SSE2"
+ "movq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_loadd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
+ (const_vector:V4SI [(const_int 0)
+ (const_int 0)
+ (const_int 0)
+ (const_int 0)])
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_stored"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=mr")
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2"
+ "movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ (vec_merge:V2DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "m,x")
+ (const_int 2)))]
+ "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "movhpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_movlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
+ (vec_merge:V2DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "m,x")
+ (const_int 1)))]
+ "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "movlpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "sse2_loadsd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" "")]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+ CONST0_RTX (V2DFmode)));
+ DONE;
+})
+
+(define_insn "sse2_loadsd_1"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+ (match_operand:V2DF 2 "const0_operand" "X")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "movsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_movsd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "movsd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_storesd"
+ [(set (match_operand:DF 0 "memory_operand" "=m")
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2"
+ "movsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_shufpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_SHUFFLE))]
+ "TARGET_SSE2"
+ ;; @@@ check operand order for intel/nonintel syntax
+ "shufpd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_clflush"
+ [(unspec_volatile [(match_operand 0 "address_operand" "p")]
+ UNSPECV_CLFLUSH)]
+ "TARGET_SSE2"
+ "clflush %0"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_expand "sse2_mfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*mfence_insn"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ "TARGET_SSE2"
+ "mfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_expand "sse2_lfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*lfence_insn"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+ "TARGET_SSE2"
+ "lfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
OpenPOWER on IntegriCloud