diff options
author | obrien <obrien@FreeBSD.org> | 2002-02-06 05:02:18 +0000 |
---|---|---|
committer | obrien <obrien@FreeBSD.org> | 2002-02-06 05:02:18 +0000 |
commit | 6687f51a11fc12815af111810dec2fcd8576307c (patch) | |
tree | 35200411c20ffc6c75fff8dced81f1f00ce69c96 /contrib/gcc/config | |
parent | a33632ba5a0d130d82627f88736619b44d682df9 (diff) | |
download | FreeBSD-src-6687f51a11fc12815af111810dec2fcd8576307c.zip FreeBSD-src-6687f51a11fc12815af111810dec2fcd8576307c.tar.gz |
Use the stock [3.l-snap] version of this.
Diffstat (limited to 'contrib/gcc/config')
-rw-r--r-- | contrib/gcc/config/i386/i386.md | 24530 |
1 files changed, 17990 insertions, 6540 deletions
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index acc8b5e..0c0cf5f 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -1,37 +1,38 @@ -; GCC machine description for Intel X86. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000 Free Software -;; Foundation, Inc. +;; GCC machine description for IA-32 and x86-64. +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Free Software Foundation, Inc. ;; Mostly by William Schelter. - +;; x86_64 support added by Jan Hubicka +;; ;; This file is part of GNU CC. - +;; ;; GNU CC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. - +;; ;; GNU CC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. - +;; ;; You should have received a copy of the GNU General Public License ;; along with GNU CC; see the file COPYING. If not, write to ;; the Free Software Foundation, 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. */ - +;; Boston, MA 02111-1307, USA. */ +;; ;; The original PO technology requires these to be ordered by speed, ;; so that assigner will pick the fastest. - +;; ;; See file "rtl.def" for documentation on define_insn, match_*, et. al. - +;; ;; Macro #define NOTICE_UPDATE_CC in file i386.h handles condition code ;; updates for most instructions. - +;; ;; Macro REG_CLASS_FROM_LETTER in file i386.h defines the register ;; constraint letters. - -;; the special asm out single letter directives following a '%' are: +;; +;; The special asm out single letter directives following a '%' are: ;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of ;; operands[1]. ;; 'L' Print the opcode suffix for a 32-bit integer opcode. @@ -41,14 +42,14 @@ ;; 'S' Print the opcode suffix for a 32-bit float opcode. ;; 'T' Print the opcode suffix for an 80-bit extended real XFmode float opcode. ;; 'J' Print the appropriate jump operand. - +;; ;; 'b' Print the QImode name of the register for the indicated operand. ;; %b0 would print %al if operands[0] is reg 0. ;; 'w' Likewise, print the HImode name of the register. ;; 'k' Likewise, print the SImode name of the register. ;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh. ;; 'y' Print "st(0)" instead of "st" as a register. - +;; ;; UNSPEC usage: ;; 0 This is a `scas' operation. The mode of the UNSPEC is always SImode. ;; operand 0 is the memory address to scan. @@ -68,1996 +69,3715 @@ ;; 6 This is the @GOT offset of a PIC address. ;; 7 This is the @GOTOFF offset of a PIC address. ;; 8 This is a reference to a symbol's @PLT address. +;; 9 This is an `fnstsw' operation. +;; 10 This is a `sahf' operation. +;; 11 This is a `fstcw' operation +;; 12 This is behaviour of add when setting carry flag. +;; 13 This is a `eh_return' placeholder. + +;; For SSE/MMX support: +;; 30 This is `fix', guaranteed to be truncating. +;; 31 This is a `emms' operation. +;; 32 This is a `maskmov' operation. +;; 33 This is a `movmsk' operation. +;; 34 This is a `non-temporal' move. +;; 36 This is used to distinguish COMISS from UCOMISS. +;; 37 This is a `ldmxcsr' operation. +;; 38 This is a forced `movaps' instruction (rather than whatever movti does) +;; 39 This is a forced `movups' instruction (rather than whatever movti does) +;; 40 This is a `stmxcsr' operation. +;; 41 This is a `shuffle' operation. +;; 42 This is a `rcp' operation. +;; 43 This is a `rsqsrt' operation. +;; 44 This is a `sfence' operation. +;; 45 This is a noop to prevent excessive combiner cleverness. +;; 46 This is a `femms' operation. +;; 49 This is a 'pavgusb' operation. +;; 50 This is a `pfrcp' operation. +;; 51 This is a `pfrcpit1' operation. +;; 52 This is a `pfrcpit2' operation. +;; 53 This is a `pfrsqrt' operation. +;; 54 This is a `pfrsqrit1' operation. + +;; Insns whose names begin with "x86_" are emitted by gen_FOO calls +;; from i386.c. + +;; In C guard expressions, put expressions which may be compile-time +;; constants first. This allows for better optimization. For +;; example, write "TARGET_64BIT && reload_completed", not +;; "reload_completed && TARGET_64BIT". + -;; This shadows the processor_type enumeration, so changes must be made -;; to i386.h at the same time. +;; Processor type. This attribute must exactly match the processor_type +;; enumeration in i386.h. +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4" + (const (symbol_ref "ix86_cpu"))) ;; $FreeBSD$ +;; A basic instruction type. Refinements due to arguments to be +;; provided in other attributes. (define_attr "type" - "integer,binary,memory,test,compare,fcompare,idiv,imul,lea,fld,fpop,fpdiv,fpmul" - (const_string "integer")) - -(define_attr "memory" "none,load,store" - (cond [(eq_attr "type" "idiv,lea") - (const_string "none") + "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx,fistp" + (const_string "other")) + +;; Main data type used by the insn +(define_attr "mode" "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI" + (const_string "unknown")) + +;; Set for i387 operations. +(define_attr "i387" "" + (if_then_else (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") + (const_int 1) + (const_int 0))) + +;; The (bounding maximum) length of an instruction immediate. +(define_attr "length_immediate" "" + (cond [(eq_attr "type" "incdec,setcc,icmov,ibr,str,cld,lea,other,multi,idiv,sse,mmx") + (const_int 0) + (eq_attr "i387" "1") + (const_int 0) + (eq_attr "type" "alu1,negnot,alu,icmp,imovx,ishift,imul,push,pop") + (symbol_ref "ix86_attr_length_immediate_default(insn,1)") + (eq_attr "type" "imov,test") + (symbol_ref "ix86_attr_length_immediate_default(insn,0)") + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + (eq_attr "type" "ibr") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -128)) + (lt (minus (match_dup 0) (pc)) + (const_int 124))) + (const_int 1) + (const_int 4)) + ] + (symbol_ref "/* Update immediate_length and other attributes! */ abort(),1"))) + +;; The (bounding maximum) length of an instruction address. +(define_attr "length_address" "" + (cond [(eq_attr "type" "str,cld,other,multi,fxch") + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + ] + (symbol_ref "ix86_attr_length_address_default (insn)"))) + +;; Set when length prefix is used. +(define_attr "prefix_data16" "" + (if_then_else (eq_attr "mode" "HI") + (const_int 1) + (const_int 0))) + +;; Set when string REP prefix is used. +(define_attr "prefix_rep" "" (const_int 0)) + +;; Set when 0f opcode prefix is used. +(define_attr "prefix_0f" "" + (if_then_else (eq_attr "type" "imovx,setcc,icmov,sse,mmx") + (const_int 1) + (const_int 0))) + +;; Set when modrm byte is used. +(define_attr "modrm" "" + (cond [(eq_attr "type" "str,cld") + (const_int 0) + (eq_attr "i387" "1") + (const_int 0) + (and (eq_attr "type" "incdec") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand:HI 1 "register_operand" ""))) + (const_int 0) + (and (eq_attr "type" "push") + (not (match_operand 1 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "pop") + (not (match_operand 0 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "imov") + (and (match_operand 0 "register_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_int 0) + ] + (const_int 1))) + +;; The (bounding maximum) length of an instruction in bytes. +;; ??? fistp is in fact fldcw/fistp/fldcw sequence. Later we may want +;; to split it and compute proper length as for other insns. +(define_attr "length" "" + (cond [(eq_attr "type" "other,multi,fistp") + (const_int 16) + ] + (plus (plus (attr "modrm") + (plus (attr "prefix_0f") + (plus (attr "i387") + (const_int 1)))) + (plus (attr "prefix_rep") + (plus (attr "prefix_data16") + (plus (attr "length_immediate") + (attr "length_address"))))))) + +;; The `memory' attribute is `none' if no memory is referenced, `load' or +;; `store' if there is a simple memory reference therein, or `unknown' +;; if the instruction is complex. + +(define_attr "memory" "none,load,store,both,unknown" + (cond [(eq_attr "type" "other,multi,str") + (const_string "unknown") + (eq_attr "type" "lea,fcmov,fpspc,cld") + (const_string "none") + (eq_attr "type" "fistp") + (const_string "both") + (eq_attr "type" "push") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "both") + (const_string "store")) + (eq_attr "type" "pop,setcc") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "both") + (const_string "load")) + (eq_attr "type" "icmp,test") + (if_then_else (ior (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "load") + (const_string "none")) + (eq_attr "type" "ibr") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "load") + (const_string "none")) + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (and (eq_attr "type" "alu1,negnot") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (and (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (match_operand 0 "memory_operand" "") + (const_string "store") + (match_operand 1 "memory_operand" "") + (const_string "load") + (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx") + (match_operand 2 "memory_operand" "")) + (const_string "load") + (and (eq_attr "type" "icmov") + (match_operand 3 "memory_operand" "")) + (const_string "load") + ] + (const_string "none"))) - (eq_attr "type" "fld") - (const_string "load") +;; Indicates if an instruction has both an immediate and a displacement. + +(define_attr "imm_disp" "false,true,unknown" + (cond [(eq_attr "type" "other,multi") + (const_string "unknown") + (and (eq_attr "type" "icmp,test,imov") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "true") + (and (eq_attr "type" "alu,ishift,imul,idiv") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 2 "immediate_operand" ""))) + (const_string "true") + ] + (const_string "false"))) + +;; Indicates if an FP operation has an integer source. + +(define_attr "fp_int_src" "false,true" + (const_string "false")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "length" "128") + (set_attr "type" "multi")]) + +;; Pentium Scheduling +;; +;; The Pentium is an in-order core with two integer pipelines. + +;; True for insns that behave like prefixed insns on the Pentium. +(define_attr "pent_prefix" "false,true" + (if_then_else (ior (eq_attr "prefix_0f" "1") + (ior (eq_attr "prefix_data16" "1") + (eq_attr "prefix_rep" "1"))) + (const_string "true") + (const_string "false"))) + +;; Categorize how an instruction slots. + +;; The non-MMX Pentium slots an instruction with prefixes on U pipe only, +;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium +;; rules, because it results in noticeably better code on non-MMX Pentium +;; and doesn't hurt much on MMX. (Prefixed instructions are not very +;; common, so the scheduler usualy has a non-prefixed insn to pair). + +(define_attr "pent_pair" "uv,pu,pv,np" + (cond [(eq_attr "imm_disp" "true") + (const_string "np") + (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec") + (and (eq_attr "type" "pop,push") + (eq_attr "memory" "!both"))) + (if_then_else (eq_attr "pent_prefix" "true") + (const_string "pu") + (const_string "uv")) + (eq_attr "type" "ibr") + (const_string "pv") + (and (eq_attr "type" "ishift") + (match_operand 2 "const_int_operand" "")) + (const_string "pu") + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_string "pv") + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_string "pv") + ] + (const_string "np"))) + +;; Rough readiness numbers. Fine tuning happens in i386.c. +;; +;; u describes pipe U +;; v describes pipe V +;; uv describes either pipe U or V for those that can issue to either +;; np describes not paring +;; fpu describes fpu +;; fpm describes fp insns of different types are not pipelined. +;; +;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. - (eq_attr "type" "test") - (if_then_else (match_operand 0 "memory_operand" "") - (const_string "load") - (const_string "none")) +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "imul")) + 11 11) - (eq_attr "type" "compare,fcompare") - (if_then_else (ior (match_operand 0 "memory_operand" "") - (match_operand 1 "memory_operand" "")) - (const_string "load") - (const_string "none")) +(define_function_unit "pent_mul" 1 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "imul")) + 11 11) - (and (eq_attr "type" "integer,memory,fpop") - (match_operand 0 "memory_operand" "")) - (const_string "store") +;; Rep movs takes minimally 12 cycles. +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "str")) + 12 12) + +; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22 +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "idiv")) + 46 46) + +; Fp reg-reg moves takes 1 cycle. Loads takes 1 cycle for SF/DF mode, +; 3 cycles for XFmode. Stores takes 2 cycles for SF/DF and 3 for XF. +; fldz and fld1 takes 2 cycles. Only reg-reg moves are pairable. +; The integer <-> fp conversion is not modeled correctly. Fild behaves +; like normal fp operation and fist takes 6 cycles. + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF")))) + 3 3) + +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF")))) + 3 3) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (ior (match_operand 1 "immediate_operand" "") + (eq_attr "memory" "store")))) + 2 2) - (and (eq_attr "type" "integer,memory,fpop") - (match_operand 1 "memory_operand" "")) - (const_string "load") +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (ior (match_operand 1 "immediate_operand" "") + (eq_attr "memory" "store")))) + 2 2) - (and (eq_attr "type" "binary,imul,fpmul,fpdiv") - (ior (match_operand 1 "memory_operand" "") - (match_operand 2 "memory_operand" ""))) - (const_string "load")] +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "cld")) + 2 2) - (const_string "none"))) +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none,load"))) + 1 1) + +; Read/Modify/Write instructions usually take 3 cycles. +(define_function_unit "pent_u" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,alu1,ishift") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "both")))) + 3 3) + +(define_function_unit "pent_uv" 2 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,alu1,ishift") + (and (eq_attr "pent_pair" "!np") + (eq_attr "memory" "both")))) + 3 3) + +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,alu1,negnot,ishift") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "both")))) + 3 3) + +; Read/Modify or Modify/Write instructions usually take 2 cycles. +(define_function_unit "pent_u" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,ishift") + (and (eq_attr "pent_pair" "pu") + (eq_attr "memory" "load,store")))) + 2 2) -;; Functional units +(define_function_unit "pent_uv" 2 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,ishift") + (and (eq_attr "pent_pair" "!np") + (eq_attr "memory" "load,store")))) + 2 2) -; (define_function_unit NAME MULTIPLICITY SIMULTANEITY -; TEST READY-DELAY ISSUE-DELAY [CONFLICT-LIST]) +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,ishift") + (and (eq_attr "pent_pair" "np") + (eq_attr "memory" "load,store")))) + 2 2) -; pentiumpro has a reservation station with 5 ports -; port 0 has integer, float add, integer divide, float divide, float -; multiply, and shifter units. -; port 1 has integer, and jump units. -; port 2 has the load address generation unit -; ports 3 and 4 have the store address generation units +; Insns w/o memory operands and move instructions usually take one cycle. +(define_function_unit "pent_u" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "pent_pair" "pu")) + 1 1) + +(define_function_unit "pent_v" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "pent_pair" "pv")) + 1 1) + +(define_function_unit "pent_uv" 2 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "pent_pair" "!np")) + 1 1) + +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "pent_pair" "np")) + 1 1) + +; Pairable insns only conflict with other non-pairable insns. +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,alu1,ishift") + (and (eq_attr "pent_pair" "!np") + (eq_attr "memory" "both")))) + 3 3 + [(eq_attr "pent_pair" "np")]) + +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (and (eq_attr "type" "alu,alu1,ishift") + (and (eq_attr "pent_pair" "!np") + (eq_attr "memory" "load,store")))) + 2 2 + [(eq_attr "pent_pair" "np")]) + +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "pent_pair" "!np")) + 1 1 + [(eq_attr "pent_pair" "np")]) + +; Floating point instructions usually blocks cycle longer when combined with +; integer instructions, because of the inpaired fxch instruction. +(define_function_unit "pent_np" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")) + 2 2 + [(eq_attr "type" "!fmov,fop,fop1,fsgn,fmul,fpspc,fcmov,fcmp,fistp")]) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fcmp,fxch,fsgn")) + 1 1) + +; Addition takes 3 cycles; assume other random cruft does as well. +; ??? Trivial fp operations such as fabs or fchs takes only one cycle. +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fop,fop1,fistp")) + 3 1) + +; Multiplication takes 3 cycles and is only half pipelined. +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fmul")) + 3 1) + +(define_function_unit "pent_mul" 1 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fmul")) + 2 2) -; pentium has two integer pipelines, the main u pipe and the secondary v pipe. -; and a float pipeline +; ??? This is correct only for fdiv and sqrt -- sin/cos take 65-100 cycles. +; They can overlap with integer insns. Only the last two cycles can overlap +; with other fp insns. Only fsin/fcos can overlap with multiplies. +; Only last two cycles of fsin/fcos can overlap with other instructions. +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fdiv")) + 39 37) + +(define_function_unit "pent_mul" 1 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fdiv")) + 39 39) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fpspc")) + 70 68) + +(define_function_unit "pent_mul" 1 1 + (and (eq_attr "cpu" "pentium") + (eq_attr "type" "fpspc")) + 70 70) + +;; Pentium Pro/PII Scheduling +;; +;; The PPro has an out-of-order core, but the instruction decoders are +;; naturally in-order and asymmetric. We get best performance by scheduling +;; for the decoders, for in doing so we give the oo execution unit the +;; most choices. + +;; Categorize how many uops an ia32 instruction evaluates to: +;; one -- an instruction with 1 uop can be decoded by any of the +;; three decoders. +;; few -- an instruction with 1 to 4 uops can be decoded only by +;; decoder 0. +;; many -- a complex instruction may take an unspecified number of +;; cycles to decode in decoder 0. + +(define_attr "ppro_uops" "one,few,many" + (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str") + (const_string "many") + (eq_attr "type" "icmov,fcmov,str,cld") + (const_string "few") + (eq_attr "type" "imov") + (if_then_else (eq_attr "memory" "store,both") + (const_string "few") + (const_string "one")) + (eq_attr "memory" "!none") + (const_string "few") + ] + (const_string "one"))) + +;; Rough readiness numbers. Fine tuning happens in i386.c. +;; +;; p0 describes port 0. +;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either. +;; p2 describes port 2 for loads. +;; p34 describes ports 3 and 4 for stores. +;; fpu describes the fpu accessed via port 0. +;; ??? It is less than clear if there are separate fadd and fmul units +;; that could operate in parallel. +;; +;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "ishift,lea,ibr,cld")) + 1 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "imul")) + 4 1) + +;; ??? Does the divider lock out the pipe while it works, +;; or is there a disconnected unit? +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "idiv")) + 17 17) -;; Floating point +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fop,fop1,fsgn,fistp")) + 3 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fcmov")) + 2 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fcmp")) + 1 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fmov")) + 1 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fmul")) + 5 1) + +(define_function_unit "ppro_p0" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fdiv,fpspc")) + 56 1) + +(define_function_unit "ppro_p01" 2 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "!imov,fmov")) + 1 1) + +(define_function_unit "ppro_p01" 2 0 + (and (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "imov,fmov")) + (eq_attr "memory" "none")) + 1 1) + +(define_function_unit "ppro_p2" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (ior (eq_attr "type" "pop") + (eq_attr "memory" "load,both"))) + 3 1) + +(define_function_unit "ppro_p34" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (ior (eq_attr "type" "push") + (eq_attr "memory" "store,both"))) + 1 1) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fop,fop1,fsgn,fmov,fcmp,fcmov,fistp")) + 1 1) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fmul")) + 5 2) + +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "fdiv,fpspc")) + 56 56) + +;; imul uses the fpu. ??? does it have the same throughput as fmul? +(define_function_unit "fpu" 1 0 + (and (eq_attr "cpu" "pentiumpro") + (eq_attr "type" "imul")) + 4 1) + +;; AMD K6/K6-2 Scheduling +;; +;; The K6 has similar architecture to PPro. Important difference is, that +;; there are only two decoders and they seems to be much slower than execution +;; units. So we have to pay much more attention to proper decoding for +;; schedulers. We share most of scheduler code for PPro in i386.c +;; +;; The fp unit is not pipelined and do one operation per two cycles including +;; the FXCH. +;; +;; alu describes both ALU units (ALU-X and ALU-Y). +;; alux describes X alu unit +;; fpu describes FPU unit +;; load describes load unit. +;; branch describes branch unit. +;; store decsribes store unit. This unit is not modelled completely and only +;; used to model lea operation. Otherwise it lie outside of the critical +;; path. +;; +;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real. -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpop,fcompare") (eq_attr "cpu" "i386,i486")) - 5 5) +;; The decoder specification is in the PPro section above! -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpop,fcompare") (eq_attr "cpu" "pentium,pentiumpro")) - 3 0) +;; Shift instructions and certain arithmetic are issued only to X pipe. +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "ishift,alu1,negnot,cld")) + 1 1) -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpmul") (eq_attr "cpu" "pentium")) - 7 0) +;; The QI mode arithmetic is issued to X pipe only. +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec") + (match_operand:QI 0 "general_operand" ""))) + 1 1) -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpmul") (eq_attr "cpu" "pentiumpro")) - 5 0) +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "ishift,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea")) + 1 1) -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "idiv") (eq_attr "cpu" "pentiumpro")) - 10 10) +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "imov") + (eq_attr "memory" "none"))) + 1 1) -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "imul") (eq_attr "cpu" "pentiumpro")) - 6 0) +(define_function_unit "k6_branch" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "call,callv,ibr")) + 1 1) -(define_function_unit "fp" 1 0 - (eq_attr "type" "fpdiv") - 10 10) +;; Load unit have two cycle latency, but we take care for it in adjust_cost +(define_function_unit "k6_load" 1 0 + (and (eq_attr "cpu" "k6") + (ior (eq_attr "type" "pop") + (eq_attr "memory" "load,both"))) + 1 1) -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "fld") (eq_attr "cpu" "!pentiumpro,k6")) - 1 0) +(define_function_unit "k6_load" 1 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "str") + (eq_attr "memory" "load,both"))) + 10 10) -;; K6 FPU is not pipelined. -(define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpop,fpmul,fcompare") (eq_attr "cpu" "k6")) - 2 2) +;; Lea have two instructions, so latency is probably 2 +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "lea")) + 2 1) -;; i386 and i486 have one integer unit, which need not be modeled +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "str")) + 10 10) -(define_function_unit "integer" 2 0 - (and (eq_attr "type" "integer,binary,test,compare,lea") (eq_attr "cpu" "pentium,pentiumpro")) - 1 0) +(define_function_unit "k6_store" 1 0 + (and (eq_attr "cpu" "k6") + (ior (eq_attr "type" "push") + (eq_attr "memory" "store,both"))) + 1 1) -(define_function_unit "integer" 2 0 +(define_function_unit "k6_fpu" 1 1 (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "integer,binary,test,compare") - (eq_attr "memory" "!load"))) - 1 0) + (eq_attr "type" "fop,fop1,fmov,fcmp,fistp")) + 2 2) -;; Internally, K6 converts REG OP MEM instructions into a load (2 cycles) -;; and a register operation (1 cycle). -(define_function_unit "integer" 2 0 +(define_function_unit "k6_fpu" 1 1 (and (eq_attr "cpu" "k6") - (and (eq_attr "type" "integer,binary,test,compare") - (eq_attr "memory" "load"))) - 3 0) + (eq_attr "type" "fmul")) + 2 2) -;; Multiplies use one of the integer units -(define_function_unit "integer" 2 0 - (and (eq_attr "cpu" "pentium") (eq_attr "type" "imul")) - 11 11) +;; ??? Guess +(define_function_unit "k6_fpu" 1 1 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "fdiv,fpspc")) + 56 56) -(define_function_unit "integer" 2 0 - (and (eq_attr "cpu" "k6") (eq_attr "type" "imul")) +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "imul")) 2 2) -(define_function_unit "integer" 2 0 - (and (eq_attr "cpu" "pentium") (eq_attr "type" "idiv")) - 25 25) +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "imul")) + 2 2) -(define_function_unit "integer" 2 0 - (and (eq_attr "cpu" "k6") (eq_attr "type" "idiv")) +;; ??? Guess +(define_function_unit "k6_alu" 2 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "idiv")) 17 17) -;; Pentium Pro and K6 have a separate load unit. -(define_function_unit "load" 1 0 - (and (eq_attr "cpu" "pentiumpro") (eq_attr "memory" "load")) - 3 0) - -(define_function_unit "load" 1 0 - (and (eq_attr "cpu" "k6") (eq_attr "memory" "load")) - 2 0) - -;; Pentium Pro and K6 have a separate store unit. -(define_function_unit "store" 1 0 - (and (eq_attr "cpu" "pentiumpro,k6") (eq_attr "memory" "store")) - 1 0) - -;; lea executes in the K6 store unit with 1 cycle latency -(define_function_unit "store" 1 0 - (and (eq_attr "cpu" "k6") (eq_attr "type" "lea")) - 1 0) - +(define_function_unit "k6_alux" 1 0 + (and (eq_attr "cpu" "k6") + (eq_attr "type" "idiv")) + 17 17) -;; "movl MEM,REG / testl REG,REG" is faster on a 486 than "cmpl $0,MEM". -;; But restricting MEM here would mean that gcc could not remove a redundant -;; test in cases like "incl MEM / je TARGET". +;; AMD Athlon Scheduling ;; -;; We don't want to allow a constant operand for test insns because -;; (set (cc0) (const_int foo)) has no mode information. Such insns will -;; be folded while optimizing anyway. - -;; All test insns have expanders that save the operands away without -;; actually generating RTL. The bCOND or sCOND (emitted immediately -;; after the tstM or cmp) will actually emit the tstM or cmpM. - -;; Processor type -- this attribute must exactly match the processor_type -;; enumeration in i386.h. - -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6" - (const (symbol_ref "ix86_cpu"))) - -(define_insn "tstsi_1" - [(set (cc0) - (match_operand:SI 0 "nonimmediate_operand" "rm"))] - "" - "* -{ - if (REG_P (operands[0])) - return AS2 (test%L0,%0,%0); - - operands[1] = const0_rtx; - return AS2 (cmp%L0,%1,%0); -}" - [(set_attr "type" "test")]) - -(define_expand "tstsi" - [(set (cc0) - (match_operand:SI 0 "nonimmediate_operand" ""))] - "" - " -{ - i386_compare_gen = gen_tstsi_1; - i386_compare_op0 = operands[0]; - i386_compare_op1 = const0_rtx; - DONE; -}") - -(define_insn "tsthi_1" - [(set (cc0) - (match_operand:HI 0 "nonimmediate_operand" "rm"))] - "" - "* -{ - if (REG_P (operands[0])) - return AS2 (test%W0,%0,%0); +;; The Athlon does contain three pipelined FP units, three integer units and +;; three address generation units. +;; +;; The predecode logic is determining boundaries of instructions in the 64 +;; byte cache line. So the cache line straddling problem of K6 might be issue +;; here as well, but it is not noted in the documentation. +;; +;; Three DirectPath instructions decoders and only one VectorPath decoder +;; is available. They can decode three DirectPath instructions or one VectorPath +;; instruction per cycle. +;; Decoded macro instructions are then passed to 72 entry instruction control +;; unit, that passes +;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. +;; +;; The load/store queue unit is not attached to the schedulers but +;; communicates with all the execution units separately instead. + +(define_attr "athlon_decode" "direct,vector" + (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov") + (const_string "vector") + (and (eq_attr "type" "push") + (match_operand 1 "memory_operand" "")) + (const_string "vector") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load,store") + (eq_attr "mode" "XF"))) + (const_string "vector")] + (const_string "direct"))) + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "vector")) + 1 1) + +(define_function_unit "athlon_directdec" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1) + +(define_function_unit "athlon_vectordec" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_decode" "direct")) + 1 1 [(eq_attr "athlon_decode" "vector")]) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop")) + 1 1) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "str")) + 15 15) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_ieu" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 42 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "imul")) + 5 0) + +(define_function_unit "athlon_muldiv" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "idiv")) + 42 42) + +(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any" + (cond [(eq_attr "type" "fop,fop1,fcmp,fistp") + (const_string "add") + (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov") + (const_string "mul") + (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both")) + (const_string "store") + (and (eq_attr "type" "fmov") (eq_attr "memory" "load")) + (const_string "any") + (and (eq_attr "type" "fmov") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "store") + (eq_attr "type" "fmov") + (const_string "muladd")] + (const_string "none"))) - operands[1] = const0_rtx; - return AS2 (cmp%W0,%1,%0); -}" - [(set_attr "type" "test")]) +;; We use latencies 1 for definitions. This is OK to model colisions +;; in execution units. The real latencies are modeled in the "fp" pipeline. + +;; fsin, fcos: 96-192 +;; fsincos: 107-211 +;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fpspc")) + 100 1) + +;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fdiv")) + 24 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fop,fop1,fmul,fistp")) + 4 1) + +;; XFmode loads are slow. +;; XFmode store is slow too (8 cycles), but we don't need to model it, because +;; there are no dependent instructions. + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fmov") + (and (eq_attr "memory" "load") + (eq_attr "mode" "XF")))) + 10 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fmov,fsgn")) + 2 1) + +;; fcmp and ftst instructions +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "athlon_decode" "direct"))) + 3 1) + +;; fcmpi instructions. +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (and (eq_attr "type" "fcmp") + (eq_attr "athlon_decode" "vector"))) + 3 1) + +(define_function_unit "athlon_fp" 3 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "type" "fcmov")) + 7 1) + +(define_function_unit "athlon_fp_mul" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "mul")) + 1 1) + +(define_function_unit "athlon_fp_add" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "add")) + 1 1) + +(define_function_unit "athlon_fp_muladd" 2 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "muladd,mul,add")) + 1 1) + +(define_function_unit "athlon_fp_store" 1 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "athlon_fpunits" "store")) + 1 1) + +;; We don't need to model the Address Generation Unit, since we don't model +;; the re-order buffer yet and thus we never schedule more than three operations +;; at time. Later we may want to experiment with MD_SCHED macros modeling the +;; decoders independently on the functional units. + +;(define_function_unit "athlon_agu" 3 0 +; (and (eq_attr "cpu" "athlon") +; (and (eq_attr "memory" "!none") +; (eq_attr "athlon_fpunits" "none"))) +; 1 1) + +;; Model load unit to avoid too long sequences of loads. We don't need to +;; model store queue, since it is hardly going to be bottleneck. + +(define_function_unit "athlon_load" 2 0 + (and (eq_attr "cpu" "athlon") + (eq_attr "memory" "load,both")) + 1 1) -(define_expand "tsthi" - [(set (cc0) - (match_operand:HI 0 "nonimmediate_operand" ""))] - "" - " -{ - i386_compare_gen = gen_tsthi_1; - i386_compare_op0 = operands[0]; - i386_compare_op1 = const0_rtx; - DONE; -}") - -(define_insn "tstqi_1" - [(set (cc0) - (match_operand:QI 0 "nonimmediate_operand" "qm"))] - "" - "* -{ - if (REG_P (operands[0])) - return AS2 (test%B0,%0,%0); + +;; Compare instructions. - operands[1] = const0_rtx; - return AS2 (cmp%B0,%1,%0); -}" - [(set_attr "type" "test")]) +;; All compare insns have expanders that save the operands away without +;; actually generating RTL. The bCOND or sCOND (emitted immediately +;; after the cmp) will actually emit the cmpM. -(define_expand "tstqi" - [(set (cc0) - (match_operand:QI 0 "nonimmediate_operand" ""))] +(define_expand "cmpdi" + [(set (reg:CC 17) + (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "x86_64_general_operand" "")))] "" - " -{ - i386_compare_gen = gen_tstqi_1; - i386_compare_op0 = operands[0]; - i386_compare_op1 = const0_rtx; - DONE; -}") - -(define_insn "tstsf_cc" - [(set (cc0) - (match_operand:SF 0 "register_operand" "f")) - (clobber (match_scratch:HI 1 "=a"))] - "TARGET_80387 && ! TARGET_IEEE_FP" - "* -{ - if (! STACK_TOP_P (operands[0])) - abort (); - - output_asm_insn (\"ftst\", operands); - - if (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG)) - output_asm_insn (AS1 (fstp,%y0), operands); - - return output_fp_cc0_set (insn); -}" - [(set_attr "type" "test")]) - -;; Don't generate tstsf if generating IEEE code, since the `ftst' opcode -;; isn't IEEE compliant. - -(define_expand "tstsf" - [(parallel [(set (cc0) - (match_operand:SF 0 "register_operand" "")) - (clobber (match_scratch:HI 1 ""))])] - "TARGET_80387 && ! TARGET_IEEE_FP" - " { - i386_compare_gen = gen_tstsf_cc; - i386_compare_op0 = operands[0]; - i386_compare_op1 = const0_rtx; - DONE; -}") - -(define_insn "tstdf_cc" - [(set (cc0) - (match_operand:DF 0 "register_operand" "f")) - (clobber (match_scratch:HI 1 "=a"))] - "TARGET_80387 && ! TARGET_IEEE_FP" - "* -{ - if (! STACK_TOP_P (operands[0])) - abort (); - - output_asm_insn (\"ftst\", operands); - - if (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG)) - output_asm_insn (AS1 (fstp,%y0), operands); - - return output_fp_cc0_set (insn); -}" - [(set_attr "type" "test")]) - -;; Don't generate tstdf if generating IEEE code, since the `ftst' opcode -;; isn't IEEE compliant. - -(define_expand "tstdf" - [(parallel [(set (cc0) - (match_operand:DF 0 "register_operand" "")) - (clobber (match_scratch:HI 1 ""))])] - "TARGET_80387 && ! TARGET_IEEE_FP" - " -{ - i386_compare_gen = gen_tstdf_cc; - i386_compare_op0 = operands[0]; - i386_compare_op1 = const0_rtx; - DONE; -}") - -(define_insn "tstxf_cc" - [(set (cc0) - (match_operand:XF 0 "register_operand" "f")) - (clobber (match_scratch:HI 1 "=a"))] - "TARGET_80387 && ! TARGET_IEEE_FP" - "* -{ - if (! STACK_TOP_P (operands[0])) - abort (); - - output_asm_insn (\"ftst\", operands); - - if (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG)) - output_asm_insn (AS1 (fstp,%y0), operands); - - return output_fp_cc0_set (insn); -}" - [(set_attr "type" "test")]) - -;; Don't generate tstxf if generating IEEE code, since the `ftst' opcode -;; isn't IEEE compliant. - -(define_expand "tstxf" - [(parallel [(set (cc0) - (match_operand:XF 0 "register_operand" "")) - (clobber (match_scratch:HI 1 ""))])] - "TARGET_80387 && ! TARGET_IEEE_FP" - " -{ - i386_compare_gen = gen_tstxf_cc; - i386_compare_op0 = operands[0]; - i386_compare_op1 = const0_rtx; + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[0] = force_reg (DImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") - -;;- compare instructions. See comments above tstM patterns about -;; expansion of these insns. - -(define_insn "cmpsi_1" - [(set (cc0) - (compare (match_operand:SI 0 "nonimmediate_operand" "mr,r") - (match_operand:SI 1 "general_operand" "ri,mr")))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "* return AS2 (cmp%L0,%1,%0);" - [(set_attr "type" "compare")]) +}) (define_expand "cmpsi" - [(set (cc0) - (compare (match_operand:SI 0 "nonimmediate_operand" "") - (match_operand:SI 1 "general_operand" "")))] + [(set (reg:CC 17) + (compare:CC (match_operand:SI 0 "cmpsi_operand" "") + (match_operand:SI 1 "general_operand" "")))] "" - " { if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[0] = force_reg (SImode, operands[0]); - - i386_compare_gen = gen_cmpsi_1; - i386_compare_op0 = operands[0]; - i386_compare_op1 = operands[1]; + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") - -(define_insn "cmphi_1" - [(set (cc0) - (compare (match_operand:HI 0 "nonimmediate_operand" "mr,r") - (match_operand:HI 1 "general_operand" "ri,mr")))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "* return AS2 (cmp%W0,%1,%0);" - [(set_attr "type" "compare")]) +}) (define_expand "cmphi" - [(set (cc0) - (compare (match_operand:HI 0 "nonimmediate_operand" "") - (match_operand:HI 1 "general_operand" "")))] + [(set (reg:CC 17) + (compare:CC (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" "")))] "" - " { if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[0] = force_reg (HImode, operands[0]); - - i386_compare_gen = gen_cmphi_1; - i386_compare_op0 = operands[0]; - i386_compare_op1 = operands[1]; + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") - -(define_insn "cmpqi_1" - [(set (cc0) - (compare (match_operand:QI 0 "nonimmediate_operand" "q,mq") - (match_operand:QI 1 "general_operand" "qm,nq")))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "* return AS2 (cmp%B0,%1,%0);" - [(set_attr "type" "compare")]) +}) (define_expand "cmpqi" - [(set (cc0) - (compare (match_operand:QI 0 "nonimmediate_operand" "") - (match_operand:QI 1 "general_operand" "")))] - "" - " + [(set (reg:CC 17) + (compare:CC (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" "")))] + "TARGET_QIMODE_MATH" { if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[0] = force_reg (QImode, operands[0]); - - i386_compare_gen = gen_cmpqi_1; - i386_compare_op0 = operands[0]; - i386_compare_op1 = operands[1]; + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") +}) -;; These implement float point compares. For each of DFmode and -;; SFmode, there is the normal insn, and an insn where the second operand -;; is converted to the desired mode. +(define_insn "cmpdi_ccno_1_rex64" + [(set (reg 17) + (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr") + (match_operand:DI 1 "const0_operand" "n,n")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "@ + test{q}\t{%0, %0|%0, %0} + cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "*cmpdi_minus_1_rex64" + [(set (reg 17) + (compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r") + (match_operand:DI 1 "x86_64_general_operand" "re,mr")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)" + "cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "DI")]) + +(define_expand "cmpdi_1_rex64" + [(set (reg:CC 17) + (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" "")))] + "TARGET_64BIT" + "") -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(match_operand:XF 0 "register_operand" "f") - (match_operand:XF 1 "register_operand" "f")])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(match_operand:XF 0 "register_operand" "f") - (float_extend:XF - (match_operand:DF 1 "nonimmediate_operand" "fm"))])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(float_extend:XF - (match_operand:DF 0 "nonimmediate_operand" "fm")) - (match_operand:XF 1 "register_operand" "f")])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(match_operand:XF 0 "register_operand" "f") - (float_extend:XF - (match_operand:SF 1 "nonimmediate_operand" "fm"))])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(float_extend:XF - (match_operand:SF 0 "nonimmediate_operand" "fm")) - (match_operand:XF 1 "register_operand" "f")])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (compare:CCFPEQ (match_operand:XF 0 "register_operand" "f") - (match_operand:XF 1 "register_operand" "f"))) - (clobber (match_scratch:HI 2 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(match_operand:DF 0 "nonimmediate_operand" "f,fm") - (match_operand:DF 1 "nonimmediate_operand" "fm,f")])) - (clobber (match_scratch:HI 3 "=a,a"))] - "TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(match_operand:DF 0 "register_operand" "f") - (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm"))])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(float_extend:DF - (match_operand:SF 0 "nonimmediate_operand" "fm")) - (match_operand:DF 1 "register_operand" "f")])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(float_extend:DF - (match_operand:SF 0 "register_operand" "f")) - (match_operand:DF 1 "nonimmediate_operand" "fm")])) - (clobber (match_scratch:HI 3 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (compare:CCFPEQ (match_operand:DF 0 "register_operand" "f") - (match_operand:DF 1 "register_operand" "f"))) - (clobber (match_scratch:HI 2 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) +(define_insn "cmpdi_1_insn_rex64" + [(set (reg 17) + (compare (match_operand:DI 0 "nonimmediate_operand" "mr,r") + (match_operand:DI 1 "x86_64_general_operand" "re,mr")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "DI")]) + + +(define_insn "*cmpsi_ccno_1" + [(set (reg 17) + (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr") + (match_operand:SI 1 "const0_operand" "n,n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{l}\t{%0, %0|%0, %0} + cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*cmpsi_minus_1" + [(set (reg 17) + (compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "SI")]) + +(define_expand "cmpsi_1" + [(set (reg:CC 17) + (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")))] + "" + "") -;; These two insns will never be generated by combine due to the mode of -;; the COMPARE. -;(define_insn "" -; [(set (cc0) -; (compare:CCFPEQ (match_operand:DF 0 "register_operand" "f") -; (float_extend:DF -; (match_operand:SF 1 "register_operand" "f")))) -; (clobber (match_scratch:HI 2 "=a"))] -; "TARGET_80387" -; "* return output_float_compare (insn, operands);") -; -;(define_insn "" -; [(set (cc0) -; (compare:CCFPEQ (float_extend:DF -; (match_operand:SF 0 "register_operand" "f")) -; (match_operand:DF 1 "register_operand" "f"))) -; (clobber (match_scratch:HI 2 "=a"))] -; "TARGET_80387" -; "* return output_float_compare (insn, operands);") - -(define_insn "*cmpsf_cc_1" - [(set (cc0) - (match_operator 2 "VOIDmode_compare_op" - [(match_operand:SF 0 "nonimmediate_operand" "f,fm") - (match_operand:SF 1 "nonimmediate_operand" "fm,f")])) - (clobber (match_scratch:HI 3 "=a,a"))] - "TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) - -(define_insn "" - [(set (cc0) - (compare:CCFPEQ (match_operand:SF 0 "register_operand" "f") - (match_operand:SF 1 "register_operand" "f"))) - (clobber (match_scratch:HI 2 "=a"))] - "TARGET_80387" - "* return output_float_compare (insn, operands);" - [(set_attr "type" "fcompare")]) +(define_insn "*cmpsi_1_insn" + [(set (reg 17) + (compare (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCmode)" + "cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "SI")]) + +(define_insn "*cmphi_ccno_1" + [(set (reg 17) + (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr") + (match_operand:HI 1 "const0_operand" "n,n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{w}\t{%0, %0|%0, %0} + cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "HI")]) + +(define_insn "*cmphi_minus_1" + [(set (reg 17) + (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r") + (match_operand:HI 1 "general_operand" "ri,mr")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "HI")]) + +(define_insn "*cmphi_1" + [(set (reg 17) + (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r") + (match_operand:HI 1 "general_operand" "ri,mr")))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCmode)" + "cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "HI")]) + +(define_insn "*cmpqi_ccno_1" + [(set (reg 17) + (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq") + (match_operand:QI 1 "const0_operand" "n,n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{b}\t{%0, %0|%0, %0} + cmp{b}\t{$0, %0|%0, 0}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_1" + [(set (reg 17) + (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q") + (match_operand:QI 1 "general_operand" "qi,mq")))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_minus_1" + [(set (reg 17) + (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q") + (match_operand:QI 1 "general_operand" "qi,mq")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1" + [(set (reg 17) + (compare + (match_operand:QI 0 "general_operand" "Qm") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1_rex64" + [(set (reg 17) + (compare + (match_operand:QI 0 "register_operand" "Q") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_2" + [(set (reg 17) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "const0_operand" "n")))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t%h0, %h0" + [(set_attr "type" "test") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_expand "cmpqi_ext_3" + [(set (reg:CC 17) + (compare:CC + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "")))] + "" + "") + +(define_insn "cmpqi_ext_3_insn" + [(set (reg 17) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "Qmn")))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "cmpqi_ext_3_insn_rex64" + [(set (reg 17) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "nonmemory_operand" "Qn")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_4" + [(set (reg 17) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +;; These implement float point compares. +;; %%% See if we can get away with VOIDmode operands on the actual insns, +;; which would allow mix and match FP modes on the compares. Which is what +;; the old patterns did, but with many more of them. (define_expand "cmpxf" - [(set (cc0) - (compare (match_operand:XF 0 "register_operand" "") - (match_operand:XF 1 "register_operand" "")))] + [(set (reg:CC 17) + (compare:CC (match_operand:XF 0 "cmp_fp_expander_operand" "") + (match_operand:XF 1 "cmp_fp_expander_operand" "")))] + "!TARGET_64BIT && TARGET_80387" +{ + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmptf" + [(set (reg:CC 17) + (compare:CC (match_operand:TF 0 "cmp_fp_expander_operand" "") + (match_operand:TF 1 "cmp_fp_expander_operand" "")))] "TARGET_80387" - " { - i386_compare_gen = gen_cmpxf_cc; - i386_compare_gen_eq = gen_cmpxf_ccfpeq; - i386_compare_op0 = operands[0]; - i386_compare_op1 = operands[1]; + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") +}) (define_expand "cmpdf" - [(set (cc0) - (compare (match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "general_operand" "")))] - "TARGET_80387" - " + [(set (reg:CC 17) + (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "") + (match_operand:DF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || TARGET_SSE2" { - i386_compare_gen = gen_cmpdf_cc; - i386_compare_gen_eq = gen_cmpdf_ccfpeq; - i386_compare_op0 = operands[0]; - i386_compare_op1 = (immediate_operand (operands[1], DFmode)) - ? copy_to_mode_reg (DFmode, operands[1]) : operands[1]; + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") +}) (define_expand "cmpsf" - [(set (cc0) - (compare (match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "general_operand" "")))] - "TARGET_80387" - " + [(set (reg:CC 17) + (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "") + (match_operand:SF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || TARGET_SSE" { - i386_compare_gen = gen_cmpsf_cc; - i386_compare_gen_eq = gen_cmpsf_ccfpeq; - i386_compare_op0 = operands[0]; - i386_compare_op1 = (immediate_operand (operands[1], SFmode)) - ? copy_to_mode_reg (SFmode, operands[1]) : operands[1]; + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; DONE; -}") +}) -(define_expand "cmpxf_cc" - [(parallel [(set (cc0) - (compare (match_operand:XF 0 "register_operand" "") - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_scratch:HI 2 ""))])] - "TARGET_80387" - "") +;; FP compares, step 1: +;; Set the FP condition codes. +;; +;; CCFPmode compare with exceptions +;; CCFPUmode compare with no exceptions -(define_expand "cmpxf_ccfpeq" - [(parallel [(set (cc0) - (compare:CCFPEQ (match_operand:XF 0 "register_operand" "") - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_scratch:HI 2 ""))])] - "TARGET_80387" - "") +;; %%% It is an unfortunate fact that ftst has no non-popping variant, +;; and that fp moves clobber the condition codes, and that there is +;; currently no way to describe this fact to reg-stack. So there are +;; no splitters yet for this. -(define_expand "cmpdf_cc" - [(parallel [(set (cc0) - (compare (match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "register_operand" ""))) - (clobber (match_scratch:HI 2 ""))])] - "TARGET_80387" - "") +;; %%% YIKES! This scheme does not retain a strong connection between +;; the real compare and the ultimate cc0 user, so CC_REVERSE does not +;; work! Only allow tos/mem with tos in op 0. +;; +;; Hmm, of course, this is what the actual _hardware_ does. Perhaps +;; things aren't as bad as they sound... -(define_expand "cmpdf_ccfpeq" - [(parallel [(set (cc0) - (compare:CCFPEQ (match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "register_operand" ""))) - (clobber (match_scratch:HI 2 ""))])] - "TARGET_80387" - " +(define_insn "*cmpfp_0" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "X"))] 9))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" { - if (! register_operand (operands[1], DFmode)) - operands[1] = copy_to_mode_reg (DFmode, operands[1]); -}") - -(define_expand "cmpsf_cc" - [(parallel [(set (cc0) - (compare (match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "register_operand" ""))) - (clobber (match_scratch:HI 2 ""))])] + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "ftst\;fnstsw\t%0\;fstp\t%y0"; + else + return "ftst\;fnstsw\t%0"; +} + [(set_attr "type" "multi") + (set_attr "mode" "unknownfp")]) + +;; We may not use "#" to split and emit these, since the REG_DEAD notes +;; used to manage the reg stack popping would not be preserved. + +(define_insn "*cmpfp_2_sf" + [(set (reg:CCFP 18) + (compare:CCFP + (match_operand:SF 0 "register_operand" "f") + (match_operand:SF 1 "nonimmediate_operand" "fm")))] "TARGET_80387" - "") + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "fcmp") + (set_attr "mode" "SF")]) -(define_expand "cmpsf_ccfpeq" - [(parallel [(set (cc0) - (compare:CCFPEQ (match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "register_operand" ""))) - (clobber (match_scratch:HI 2 ""))])] +(define_insn "*cmpfp_2_sf_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:SF 1 "register_operand" "f") + (match_operand:SF 2 "nonimmediate_operand" "fm"))] 9))] "TARGET_80387" - " -{ - if (! register_operand (operands[1], SFmode)) - operands[1] = copy_to_mode_reg (SFmode, operands[1]); -}") - -;; logical compare - -(define_insn "" - [(set (cc0) - (and:SI (match_operand:SI 0 "general_operand" "%ro") - (match_operand:SI 1 "nonmemory_operand" "ri")))] - "" - "* -{ - /* For small integers, we may actually use testb. */ - if (GET_CODE (operands[1]) == CONST_INT - && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])) - && (! REG_P (operands[0]) || QI_REG_P (operands[0])) - /* A Pentium test is pairable only with eax. Not with ah or al. */ - && (! REG_P (operands[0]) || REGNO (operands[0]) || !TARGET_PENTIUM - || optimize_size)) - { - /* We may set the sign bit spuriously. */ - - if ((INTVAL (operands[1]) & ~0xff) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - return AS2 (test%B0,%1,%b0); - } - - if ((INTVAL (operands[1]) & ~0xff00) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - operands[1] = GEN_INT (INTVAL (operands[1]) >> 8); - - if (QI_REG_P (operands[0])) - return AS2 (test%B0,%1,%h0); - else - { - operands[0] = adj_offsettable_operand (operands[0], 1); - return AS2 (test%B0,%1,%b0); - } - } - - if (GET_CODE (operands[0]) == MEM - && (INTVAL (operands[1]) & ~0xff0000) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - operands[1] = GEN_INT (INTVAL (operands[1]) >> 16); - operands[0] = adj_offsettable_operand (operands[0], 2); - return AS2 (test%B0,%1,%b0); - } - - if (GET_CODE (operands[0]) == MEM - && (INTVAL (operands[1]) & ~0xff000000) == 0) - { - operands[1] = GEN_INT ((INTVAL (operands[1]) >> 24) & 0xff); - operands[0] = adj_offsettable_operand (operands[0], 3); - return AS2 (test%B0,%1,%b0); - } - } - - if (CONSTANT_P (operands[1]) || GET_CODE (operands[0]) == MEM) - return AS2 (test%L0,%1,%0); - - return AS2 (test%L1,%0,%1); -}" - [(set_attr "type" "compare")]) - -(define_insn "" - [(set (cc0) - (and:HI (match_operand:HI 0 "general_operand" "%ro") - (match_operand:HI 1 "nonmemory_operand" "ri")))] - "" - "* -{ - if (GET_CODE (operands[1]) == CONST_INT - && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])) - && (! REG_P (operands[0]) || QI_REG_P (operands[0]))) - { - if ((INTVAL (operands[1]) & 0xff00) == 0) - { - /* ??? This might not be necessary. */ - if (INTVAL (operands[1]) & 0xffff0000) - operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); - - /* We may set the sign bit spuriously. */ - cc_status.flags |= CC_NOT_NEGATIVE; - return AS2 (test%B0,%1,%b0); - } - - if ((INTVAL (operands[1]) & 0xff) == 0) - { - operands[1] = GEN_INT ((INTVAL (operands[1]) >> 8) & 0xff); + "* return output_fp_compare (insn, operands, 2, 0);" + [(set_attr "type" "fcmp") + (set_attr "mode" "SF")]) + +(define_insn "*cmpfp_2_df" + [(set (reg:CCFP 18) + (compare:CCFP + (match_operand:DF 0 "register_operand" "f") + (match_operand:DF 1 "nonimmediate_operand" "fm")))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "fcmp") + (set_attr "mode" "DF")]) - if (QI_REG_P (operands[0])) - return AS2 (test%B0,%1,%h0); - else - { - operands[0] = adj_offsettable_operand (operands[0], 1); - return AS2 (test%B0,%1,%b0); - } - } - } +(define_insn "*cmpfp_2_df_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:DF 1 "register_operand" "f") + (match_operand:DF 2 "nonimmediate_operand" "fm"))] 9))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 2, 0);" + [(set_attr "type" "multi") + (set_attr "mode" "DF")]) + +(define_insn "*cmpfp_2_xf" + [(set (reg:CCFP 18) + (compare:CCFP + (match_operand:XF 0 "register_operand" "f") + (match_operand:XF 1 "register_operand" "f")))] + "!TARGET_64BIT && TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "fcmp") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_2_tf" + [(set (reg:CCFP 18) + (compare:CCFP + (match_operand:TF 0 "register_operand" "f") + (match_operand:TF 1 "register_operand" "f")))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "fcmp") + (set_attr "mode" "XF")]) - /* use 32-bit test instruction if there are no sign issues */ - if (GET_CODE (operands[1]) == CONST_INT - && !(INTVAL (operands[1]) & ~0x7fff) - && i386_aligned_p (operands[0])) - return AS2 (test%L0,%1,%k0); +(define_insn "*cmpfp_2_xf_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] 9))] + "!TARGET_64BIT && TARGET_80387" + "* return output_fp_compare (insn, operands, 2, 0);" + [(set_attr "type" "multi") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_2_tf_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:TF 1 "register_operand" "f") + (match_operand:TF 2 "register_operand" "f"))] 9))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 2, 0);" + [(set_attr "type" "multi") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_2u" + [(set (reg:CCFPU 18) + (compare:CCFPU + (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 0, 1);" + [(set_attr "type" "fcmp") + (set_attr "mode" "unknownfp")]) - if (CONSTANT_P (operands[1]) || GET_CODE (operands[0]) == MEM) - return AS2 (test%W0,%1,%0); +(define_insn "*cmpfp_2u_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))] 9))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 2, 1);" + [(set_attr "type" "multi") + (set_attr "mode" "unknownfp")]) - return AS2 (test%W1,%0,%1); -}" - [(set_attr "type" "compare")]) +;; Patterns to match the SImode-in-memory ficom instructions. +;; +;; %%% Play games with accepting gp registers, as otherwise we have to +;; force them to memory during rtl generation, which is no good. We +;; can get rid of this once we teach reload to do memory input reloads +;; via pushes. + +(define_insn "*ficom_1" + [(set (reg:CCFP 18) + (compare:CCFP + (match_operand 0 "register_operand" "f,f") + (float (match_operand:SI 1 "nonimmediate_operand" "m,?r"))))] + "0 && TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (XEXP (SET_SRC (PATTERN (insn)), 1)) == GET_MODE (operands[0])" + "#") -(define_insn "" - [(set (cc0) - (and:QI (match_operand:QI 0 "nonimmediate_operand" "%qm") - (match_operand:QI 1 "nonmemory_operand" "qi")))] - "" - "* -{ - if (CONSTANT_P (operands[1]) || GET_CODE (operands[0]) == MEM) - return AS2 (test%B0,%1,%0); +;; Split the not-really-implemented gp register case into a +;; push-op-pop sequence. +;; +;; %%% This is most efficient, but am I gonna get in trouble +;; for separating cc0_setter and cc0_user? - return AS2 (test%B1,%0,%1); -}" - [(set_attr "type" "compare")]) +(define_split + [(set (reg:CCFP 18) + (compare:CCFP + (match_operand:SF 0 "register_operand" "") + (float (match_operand:SI 1 "register_operand" ""))))] + "0 && TARGET_80387 && reload_completed" + [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 1)) + (set (reg:CCFP 18) (compare:CCFP (match_dup 0) (match_dup 2))) + (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "operands[2] = gen_rtx_MEM (Pmode, stack_pointer_rtx); + operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]);") + +;; FP compares, step 2 +;; Move the fpsw to ax. + +(define_insn "x86_fnstsw_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI [(reg 18)] 9))] + "TARGET_80387" + "fnstsw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "SI") + (set_attr "i387" "1") + (set_attr "ppro_uops" "few")]) + +;; FP compares, step 3 +;; Get ax into flags, general case. + +(define_insn "x86_sahf_1" + [(set (reg:CC 17) + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] 10))] + "!TARGET_64BIT" + "sahf" + [(set_attr "length" "1") + (set_attr "athlon_decode" "vector") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "one")]) + +;; Pentium Pro can do steps 1 through 3 in one go. + +(define_insn "*cmpfp_i" + [(set (reg:CCFP 17) + (compare:CCFP (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE + && !SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[0])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp") + (set_attr "mode" "unknownfp") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_i_sse" + [(set (reg:CCFP 17) + (compare:CCFP (match_operand 0 "register_operand" "f#x,x#f") + (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))] + "TARGET_80387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[0])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp,sse") + (set_attr "mode" "unknownfp") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_i_sse_only" + [(set (reg:CCFP 17) + (compare:CCFP (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[0])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "sse") + (set_attr "mode" "unknownfp") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu" + [(set (reg:CCFPU 17) + (compare:CCFPU (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE + && !SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp") + (set_attr "mode" "unknownfp") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu_sse" + [(set (reg:CCFPU 17) + (compare:CCFPU (match_operand 0 "register_operand" "f#x,x#f") + (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))] + "TARGET_80387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp,sse") + (set_attr "mode" "unknownfp") + (set_attr "athlon_decode" "vector")]) + +(define_insn "*cmpfp_iu_sse_only" + [(set (reg:CCFPU 17) + (compare:CCFPU (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "sse") + (set_attr "mode" "unknownfp") + (set_attr "athlon_decode" "vector")]) -;; move instructions. -;; There is one for each machine mode, -;; and each is preceded by a corresponding push-insn pattern -;; (since pushes are not general_operands on the 386). - -(define_insn "" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "nonmemory_operand" "rn"))] - "flag_pic" - "* return AS1 (push%L0,%1);" - [(set_attr "memory" "store")]) - -(define_insn "" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "nonmemory_operand" "ri"))] - "!flag_pic" - "* return AS1 (push%L0,%1);" - [(set_attr "memory" "store")]) - -;; On a 386, it is faster to push MEM directly. - -(define_insn "" - [(set (match_operand:SI 0 "push_operand" "=<") - (match_operand:SI 1 "memory_operand" "m"))] - "TARGET_PUSH_MEMORY" - "* return AS1 (push%L0,%1);" - [(set_attr "type" "memory") - (set_attr "memory" "load")]) +;; Move instructions. ;; General case of fullword move. -;; If generating PIC code and operands[1] is a symbolic CONST, emit a -;; move to get the address of the symbolic object from the GOT. - (define_expand "movsi" - [(set (match_operand:SI 0 "general_operand" "") + [(set (match_operand:SI 0 "nonimmediate_operand" "") (match_operand:SI 1 "general_operand" ""))] "" - " -{ - extern int flag_pic; + "ix86_expand_move (SImode, operands); DONE;") - if (flag_pic && SYMBOLIC_CONST (operands[1])) - emit_pic_move (operands, SImode); - - /* Don't generate memory->memory moves, go through a register */ - else if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) - { - operands[1] = force_reg (SImode, operands[1]); - } -}") - -;; On i486, incl reg is faster than movl $1,reg. +;; Push/pop instructions. They are separate since autoinc/dec is not a +;; general_operand. +;; +;; %%% We don't use a post-inc memory reference because x86 is not a +;; general AUTO_INC_DEC host, which impacts how it is treated in flow. +;; Changing this impacts compiler performance on other non-AUTO_INC_DEC +;; targets without our curiosities, and it is just as easy to represent +;; this differently. -(define_insn "" - [(set (match_operand:SI 0 "general_operand" "=g,r,r") - (match_operand:SI 1 "general_operand" "rn,i,m"))] - "((!TARGET_MOVE || GET_CODE (operands[0]) != MEM) - || (GET_CODE (operands[1]) != MEM)) - && flag_pic" - "* +(define_insn "*pushsi2" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushsi2_rex64" + [(set (match_operand:SI 0 "push_operand" "=X") + (match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*pushsi2_prologue" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m")) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*popsi1_epilogue" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI 7))) + (set (reg:SI 7) + (plus:SI (reg:SI 7) (const_int 4))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "pop{l}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "SI")]) + +(define_insn "popsi1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI 7))) + (set (reg:SI 7) + (plus:SI (reg:SI 7) (const_int 4)))] + "!TARGET_64BIT" + "pop{l}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "SI")]) + +(define_insn "*movsi_xor" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "const0_operand" "i")) + (clobber (reg:CC 17))] + "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" + "xor{l}\t{%0, %0|%0, %0}" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsi_or" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "immediate_operand" "i")) + (clobber (reg:CC 17))] + "reload_completed && GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) == -1 + && (TARGET_PENTIUM || optimize_size)" { - rtx link; - - /* K6: mov reg,0 is slightly faster than xor reg,reg but is 3 bytes - longer. */ - if ((ix86_cpu != PROCESSOR_K6 || optimize_size) - && operands[1] == const0_rtx && REG_P (operands[0])) - return AS2 (xor%L0,%0,%0); - - if (operands[1] == const1_rtx - /* PPRO and K6 prefer mov to inc to reduce dependencies. */ - && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) - && (link = find_reg_note (insn, REG_WAS_0, 0)) - /* Make sure the insn that stored the 0 is still present. */ - && ! INSN_DELETED_P (XEXP (link, 0)) - && GET_CODE (XEXP (link, 0)) != NOTE - /* Make sure cross jumping didn't happen here. */ - && no_labels_between_p (XEXP (link, 0), insn) - /* Make sure the reg hasn't been clobbered. */ - && ! reg_set_between_p (operands[0], XEXP (link, 0), insn)) - /* Fastest way to change a 0 to a 1. */ - return AS1 (inc%L0,%0); - - if (SYMBOLIC_CONST (operands[1])) - return AS2 (lea%L0,%a1,%0); - - return AS2 (mov%L0,%1,%0); -}" - [(set_attr "type" "integer,integer,memory") - (set_attr "memory" "*,*,load")]) - -(define_insn "" - [(set (match_operand:SI 0 "general_operand" "=g,r") - (match_operand:SI 1 "general_operand" "ri,m"))] - "((!TARGET_MOVE || GET_CODE (operands[0]) != MEM) - || (GET_CODE (operands[1]) != MEM)) - && !flag_pic" - "* + operands[1] = constm1_rtx; + return "or{l}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "1")]) + +(define_insn "*movsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m,!*y,!rm,!*Y,!rm,!*Y") + (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,rm,*Y,*Y"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { - rtx link; - - /* Use of xor was disabled for AMD K6 as recommended by the Optimization - Manual. My test shows, that this generally hurts the performance, because - mov is longer and takes longer to decode and decoding is the main - bottleneck of K6 when executing GCC code. */ - - if (operands[1] == const0_rtx && REG_P (operands[0])) - return AS2 (xor%L0,%0,%0); - - if (operands[1] == const1_rtx - /* PPRO and K6 prefer mov to inc to reduce dependencies. */ - && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) - && (link = find_reg_note (insn, REG_WAS_0, 0)) - /* Make sure the insn that stored the 0 is still present. */ - && ! INSN_DELETED_P (XEXP (link, 0)) - && GET_CODE (XEXP (link, 0)) != NOTE - /* Make sure cross jumping didn't happen here. */ - && no_labels_between_p (XEXP (link, 0), insn) - /* Make sure the reg hasn't been clobbered. */ - && ! reg_set_between_p (operands[0], XEXP (link, 0), insn)) - /* Fastest way to change a 0 to a 1. */ - return AS1 (inc%L0,%0); - - return AS2 (mov%L0,%1,%0); -}" - [(set_attr "type" "integer,memory") - (set_attr "memory" "*,load")]) - -(define_insn "" - [(set (match_operand:HI 0 "push_operand" "=<") - (match_operand:HI 1 "nonmemory_operand" "ri"))] - "" - "* return AS1 (push%W0,%1);" - [(set_attr "type" "memory") - (set_attr "memory" "store")]) + switch (get_attr_type (insn)) + { + case TYPE_SSE: + if (get_attr_mode (insn) == TImode) + return "movdqa\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; -(define_insn "" - [(set (match_operand:HI 0 "push_operand" "=<") - (match_operand:HI 1 "memory_operand" "m"))] - "TARGET_PUSH_MEMORY" - "* return AS1 (push%W0,%1);" - [(set_attr "type" "memory") - (set_attr "memory" "load")]) + case TYPE_MMX: + return "movd\t{%1, %0|%0, %1}"; -;; On i486, an incl and movl are both faster than incw and movw. + case TYPE_LEA: + return "lea{l}\t{%1, %0|%0, %1}"; + + default: + if (flag_pic && SYMBOLIC_CONST (operands[1])) + abort(); + return "mov{l}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "4,5") + (const_string "mmx") + (eq_attr "alternative" "6,7,8") + (const_string "sse") + (and (ne (symbol_ref "flag_pic") (const_int 0)) + (match_operand:SI 1 "symbolic_operand" "")) + (const_string "lea") + ] + (const_string "imov"))) + (set_attr "modrm" "0,*,0,*,*,*,*,*,*") + (set_attr "mode" "SI,SI,SI,SI,SI,SI,TI,SI,SI")]) + +;; Stores and loads of ax to arbitary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabssi_1_rex64" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er,i"))] + "TARGET_64BIT" + "@ + movabs{l}\t{%1, %P0|%P0, %1} + mov{l}\t{%1, %a0|%a0, %1} + movabs{l}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*,*") + (set_attr "length_address" "8,0,0") + (set_attr "length_immediate" "0,*,*") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*movabssi_2_rex64" + [(set (match_operand:SI 0 "register_operand" "=a,r") + (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT" + "@ + movabs{l}\t{%P1, %0|%0, %P1} + mov{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "SI")]) + +(define_insn "*swapsi" + [(set (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "" + "xchg{l}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "mode" "SI") + (set_attr "modrm" "0") + (set_attr "ppro_uops" "few")]) (define_expand "movhi" - [(set (match_operand:HI 0 "general_operand" "") - (match_operand:HI 1 "general_operand" ""))] + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] "" - " + "ix86_expand_move (HImode, operands); DONE;") + +(define_insn "*pushhi2" + [(set (match_operand:HI 0 "push_operand" "=<,<") + (match_operand:HI 1 "general_no_elim_operand" "n,r*m"))] + "!TARGET_64BIT" + "@ + push{w}\t{|WORD PTR }%1 + push{w}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "HI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushhi2_rex64" + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "QI")]) + +(define_insn "*movhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=*a,r,r,*a,r,m") + (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) + switch (get_attr_type (insn)) { - operands[1] = force_reg (HImode, operands[1]); + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; } -}") +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "0,1") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (and (eq_attr "alternative" "2,3,4") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "imov") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "0,1,3,4")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "2,3,4") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "SI") + (and (eq_attr "alternative" "0,1") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "SI") + ] + (const_string "HI"))) + (set_attr "modrm" "0,*,*,0,*,*")]) + +;; Stores and loads of ax to arbitary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabshi_1_rex64" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er,i"))] + "TARGET_64BIT" + "@ + movabs{w}\t{%1, %P0|%P0, %1} + mov{w}\t{%1, %a0|%a0, %1} + movabs{w}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*,*") + (set_attr "length_address" "8,0,0") + (set_attr "length_immediate" "0,*,*") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*movabshi_2_rex64" + [(set (match_operand:HI 0 "register_operand" "=a,r") + (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT" + "@ + movabs{w}\t{%P1, %0|%0, %P1} + mov{w}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "HI")]) + +(define_insn "*swaphi_1" + [(set (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{w}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "mode" "HI") + (set_attr "modrm" "0") + (set_attr "ppro_uops" "few")]) + +(define_insn "*swaphi_2" + [(set (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "! TARGET_PARTIAL_REG_STALL" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "mode" "SI") + (set_attr "modrm" "0") + (set_attr "ppro_uops" "few")]) -(define_insn "" - [(set (match_operand:HI 0 "general_operand" "=g,r") - (match_operand:HI 1 "general_operand" "ri,m"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* +(define_expand "movstricthi" + [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "")) + (match_operand:HI 1 "general_operand" ""))] + "! TARGET_PARTIAL_REG_STALL || optimize_size" { - rtx link; - if (REG_P (operands[0]) && operands[1] == const0_rtx) - return AS2 (xor%L0,%k0,%k0); - - if (REG_P (operands[0]) && operands[1] == const1_rtx - /* PPRO and K6 prefer mov to inc to reduce dependencies. */ - && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) - && (link = find_reg_note (insn, REG_WAS_0, 0)) - /* Make sure the insn that stored the 0 is still present. */ - && ! INSN_DELETED_P (XEXP (link, 0)) - && GET_CODE (XEXP (link, 0)) != NOTE - /* Make sure cross jumping didn't happen here. */ - && no_labels_between_p (XEXP (link, 0), insn) - /* Make sure the reg hasn't been clobbered. */ - && ! reg_set_between_p (operands[0], XEXP (link, 0), insn)) - /* Fastest way to change a 0 to a 1. */ - return AS1 (inc%L0,%k0); - - if (REG_P (operands[0])) - { - if (i386_aligned_p (operands[1])) - { - operands[1] = i386_sext16_if_const (operands[1]); - return AS2 (mov%L0,%k1,%k0); - } - if (! TARGET_ZERO_EXTEND_WITH_AND) - { - /* movzwl is faster than movw on the Pentium Pro, - * although not as fast as an aligned movl. */ -#ifdef INTEL_SYNTAX - return AS2 (movzx,%1,%k0); -#else - return AS2 (movz%W0%L0,%1,%k0); -#endif - } - } + /* Don't generate memory->memory moves, go through a register */ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (HImode, operands[1]); +}) - return AS2 (mov%W0,%1,%0); -}" - [(set_attr "type" "integer,memory") - (set_attr "memory" "*,load")]) +(define_insn "*movstricthi_1" + [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r")) + (match_operand:HI 1 "general_operand" "rn,m"))] + "(! TARGET_PARTIAL_REG_STALL || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "mov{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "HI")]) + +(define_insn "*movstricthi_xor" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (match_operand:HI 1 "const0_operand" "i")) + (clobber (reg:CC 17))] + "reload_completed + && ((!TARGET_USE_MOV0 && !TARGET_PARTIAL_REG_STALL) || optimize_size)" + "xor{w}\t{%0, %0|%0, %0}" + [(set_attr "type" "alu1") + (set_attr "mode" "HI") + (set_attr "length_immediate" "0")]) -(define_expand "movstricthi" - [(set (strict_low_part (match_operand:HI 0 "general_operand" "")) - (match_operand:HI 1 "general_operand" ""))] +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] "" - " + "ix86_expand_move (QImode, operands); DONE;") + +;; emit_push_insn when it calls move_by_pieces requires an insn to +;; "push a byte". But actually we use pushw, which has the effect +;; of rounding the amount pushed up to a halfword. + +(define_insn "*pushqi2" + [(set (match_operand:QI 0 "push_operand" "=X,X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "n,r"))] + "!TARGET_64BIT" + "@ + push{w}\t{|word ptr }%1 + push{w}\t%w1" + [(set_attr "type" "push") + (set_attr "mode" "HI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushqi2_rex64" + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "QI")]) + +;; Situation is quite tricky about when to choose full sized (SImode) move +;; over QImode moves. For Q_REG -> Q_REG move we use full size only for +;; partial register dependency machines (such as AMD Athlon), where QImode +;; moves issue extra dependency and for partial register stalls machines +;; that don't use QImode patterns (and QImode move cause stall on the next +;; instruction). +;; +;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial +;; register stall machines with, where we use QImode instructions, since +;; partial register stall can be caused there. Then we use movzx. +(define_insn "*movqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") + (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] + "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) + switch (get_attr_type (insn)) { - operands[1] = force_reg (HImode, operands[1]); + case TYPE_IMOVX: + if (!ANY_QI_REG_P (operands[1]) && GET_CODE (operands[1]) != MEM) + abort (); + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{b}\t{%1, %0|%0, %1}"; } -}") - -(define_insn "" - [(set (strict_low_part (match_operand:HI 0 "general_operand" "+g,r")) - (match_operand:HI 1 "general_operand" "ri,m"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* -{ - rtx link; - - /* Use of xor was disabled for AMD K6 as recommended by the Optimization - Manual. My test shows, that this generally hurts the performance, because - mov is longer and takes longer to decode and decoding is the main - bottleneck of K6 when executing GCC code. */ - - if (operands[1] == const0_rtx && REG_P (operands[0])) - return AS2 (xor%W0,%0,%0); - - if (operands[1] == const1_rtx - /* PPRO and K6 prefer mov to inc to reduce dependencies. */ - && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) - && (link = find_reg_note (insn, REG_WAS_0, 0)) - /* Make sure the insn that stored the 0 is still present. */ - && ! INSN_DELETED_P (XEXP (link, 0)) - && GET_CODE (XEXP (link, 0)) != NOTE - /* Make sure cross jumping didn't happen here. */ - && no_labels_between_p (XEXP (link, 0), insn) - /* Make sure the reg hasn't been clobbered. */ - && ! reg_set_between_p (operands[0], XEXP (link, 0), insn)) - /* Fastest way to change a 0 to a 1. */ - return AS1 (inc%W0,%0); - - return AS2 (mov%W0,%1,%0); -}" - [(set_attr "type" "integer,memory")]) - -;; emit_push_insn when it calls move_by_pieces -;; requires an insn to "push a byte". -;; But actually we use pushw, which has the effect of rounding -;; the amount pushed up to a halfword. -(define_insn "" - [(set (match_operand:QI 0 "push_operand" "=<") - (match_operand:QI 1 "const_int_operand" "n"))] - "" - "* return AS1(push%W0,%1);") - -(define_insn "" - [(set (match_operand:QI 0 "push_operand" "=<") - (match_operand:QI 1 "register_operand" "q"))] +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "3") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (eq_attr "alternative" "3,5") + (const_string "imovx") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (const_string "SI") + (eq_attr "alternative" "6") + (const_string "QI") + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1,2") + (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)))) + (const_string "SI") + ;; Avoid partial register stalls when not using QImode arithmetic + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1,2") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0))))) + (const_string "SI") + ] + (const_string "QI")))]) + +(define_expand "reload_outqi" + [(parallel [(match_operand:QI 0 "" "=m") + (match_operand:QI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "=&q")])] "" - "* { - operands[1] = gen_rtx_REG (HImode, REGNO (operands[1])); - return AS1 (push%W0,%1); -}") + rtx op0, op1, op2; + op0 = operands[0]; op1 = operands[1]; op2 = operands[2]; -;; On i486, incb reg is faster than movb $1,reg. + if (reg_overlap_mentioned_p (op2, op0)) + abort (); + if (! q_regs_operand (op1, QImode)) + { + emit_insn (gen_movqi (op2, op1)); + op1 = op2; + } + emit_insn (gen_movqi (op0, op1)); + DONE; +}) -;; ??? Do a recognizer for zero_extract that looks just like this, but reads -;; or writes %ah, %bh, %ch, %dh. +(define_insn "*swapqi" + [(set (match_operand:QI 0 "register_operand" "+r") + (match_operand:QI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "" + "xchg{b}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "mode" "QI") + (set_attr "modrm" "0") + (set_attr "ppro_uops" "few")]) -(define_expand "movqi" - [(set (match_operand:QI 0 "general_operand" "") +(define_expand "movstrictqi" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] + "! TARGET_PARTIAL_REG_STALL" +{ + /* Don't generate memory->memory moves, go through a register. */ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*movstrictqi_1" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (match_operand:QI 1 "general_operand" "*qn,m"))] + "! TARGET_PARTIAL_REG_STALL + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "mov{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movstrictqi_xor" + [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) + (match_operand:QI 1 "const0_operand" "i")) + (clobber (reg:CC 17))] + "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" + "xor{b}\t{%0, %0|%0, %0}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsi_extv_1" + [(set (match_operand:SI 0 "register_operand" "=R") + (sign_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] "" - " + "movs{bl|x}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movhi_extv_1" + [(set (match_operand:HI 0 "register_operand" "=R") + (sign_extract:HI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extv_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "!TARGET_64BIT" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) + switch (get_attr_type (insn)) { - operands[1] = force_reg (QImode, operands[1]); + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; } -}") - -(define_insn "" - [(set (match_operand:QI 0 "nonimmediate_operand" "=q,*r,qm") - (match_operand:QI 1 "general_operand" "*g,*rn,qn"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extv_1_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" { - rtx link; - - /* movb $0,reg8 is 2 bytes, the same as xorl reg8,reg8. - It is at least as fast as xor on any processor except a Pentium. */ - - if (operands[1] == const1_rtx - && TARGET_PENTIUM - && (link = find_reg_note (insn, REG_WAS_0, 0)) - /* Make sure the insn that stored the 0 is still present. */ - && ! INSN_DELETED_P (XEXP (link, 0)) - && GET_CODE (XEXP (link, 0)) != NOTE - /* Make sure cross jumping didn't happen here. */ - && no_labels_between_p (XEXP (link, 0), insn) - /* Make sure the reg hasn't been clobbered. */ - && ! reg_set_between_p (operands[0], XEXP (link, 0), insn)) - { - /* Fastest way to change a 0 to a 1. - If inc%B0 isn't allowed, use inc%L0. */ - if (NON_QI_REG_P (operands[0])) - return AS1 (inc%L0,%k0); - else - return AS1 (inc%B0,%0); + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; } - - /* If mov%B0 isn't allowed for one of these regs, use mov%L0. */ - if (NON_QI_REG_P (operands[0]) || NON_QI_REG_P (operands[1])) - return (AS2 (mov%L0,%k1,%k0)); - - return (AS2 (mov%B0,%1,%0)); -}") - -;; If it becomes necessary to support movstrictqi into %esi or %edi, -;; use the insn sequence: -;; -;; shrdl $8,srcreg,dstreg -;; rorl $24,dstreg -;; -;; If operands[1] is a constant, then an andl/orl sequence would be -;; faster. - -(define_expand "movstrictqi" - [(set (strict_low_part (match_operand:QI 0 "general_operand" "")) - (match_operand:QI 1 "general_operand" ""))] +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +;; Stores and loads of ax to arbitary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabsqi_1_rex64" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er,i"))] + "TARGET_64BIT" + "@ + movabs{b}\t{%1, %P0|%P0, %1} + mov{b}\t{%1, %a0|%a0, %1} + movabs{b}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*,*") + (set_attr "length_address" "8,0,0") + (set_attr "length_immediate" "0,*,*") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*movabsqi_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=a,r") + (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT" + "@ + movabs{b}\t{%P1, %0|%0, %P1} + mov{b}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "QI")]) + +(define_insn "*movsi_extzv_1" + [(set (match_operand:SI 0 "register_operand" "=R") + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] "" - " + "movz{bl|x}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extzv_2" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "!TARGET_64BIT" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) + switch (get_attr_type (insn)) { - operands[1] = force_reg (QImode, operands[1]); + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; } -}") - -(define_insn "" - [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) - (match_operand:QI 1 "general_operand" "*qn,m"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extzv_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "TARGET_64BIT" { - rtx link; + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "movsi_insv_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "general_operand" "Qmn"))] + "!TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movsi_insv_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movqi_insv_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)) + (const_int 255)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) - /* movb $0,reg8 is 2 bytes, the same as xorl reg8,reg8. */ +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + "ix86_expand_move (DImode, operands); DONE;") - if (operands[1] == const1_rtx - && TARGET_PENTIUM - && ! NON_QI_REG_P (operands[0]) - && (link = find_reg_note (insn, REG_WAS_0, 0)) - /* Make sure the insn that stored the 0 is still present. */ - && ! INSN_DELETED_P (XEXP (link, 0)) - && GET_CODE (XEXP (link, 0)) != NOTE - /* Make sure cross jumping didn't happen here. */ - && no_labels_between_p (XEXP (link, 0), insn) - /* Make sure the reg hasn't been clobbered. */ - && ! reg_set_between_p (operands[0], XEXP (link, 0), insn)) - /* Fastest way to change a 0 to a 1. */ - return AS1 (inc%B0,%0); +(define_insn "*pushdi" + [(set (match_operand:DI 0 "push_operand" "=<") + (match_operand:DI 1 "general_no_elim_operand" "riF*m"))] + "!TARGET_64BIT" + "#") - /* If mov%B0 isn't allowed for one of these regs, use mov%L0. */ - if (NON_QI_REG_P (operands[0]) || NON_QI_REG_P (operands[1])) - { - abort (); - return (AS2 (mov%L0,%k1,%k0)); - } +(define_insn "pushdi2_rex64" + [(set (match_operand:DI 0 "push_operand" "=<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + "TARGET_64BIT" + "@ + push{q}\t%1 + #" + [(set_attr "type" "push,multi") + (set_attr "mode" "DI")]) + +;; Convert impossible pushes of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, push sign extended lower part first and then overwrite +;; upper part by 32bit move. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") - return AS2 (mov%B0,%1,%0); -}") +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + "split_di (operands + 1, 1, operands + 2, operands + 3); + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); + ") -(define_insn "movsf_push" - [(set (match_operand:SF 0 "push_operand" "=<,<") - (match_operand:SF 1 "general_operand" "*rfF,m"))] - "TARGET_PUSH_MEMORY || GET_CODE (operands[1]) != MEM - || reload_in_progress || reload_completed" - "* +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2)) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + "split_di (operands + 1, 1, operands + 2, operands + 3); + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); + ") + +(define_insn "*pushdi2_prologue_rex64" + [(set (match_operand:DI 0 "push_operand" "=<") + (match_operand:DI 1 "general_no_elim_operand" "re*m")) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "push{q}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*popdi1_epilogue_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") + (mem:DI (reg:DI 7))) + (set (reg:DI 7) + (plus:DI (reg:DI 7) (const_int 8))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "pop{q}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "DI")]) + +(define_insn "popdi1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") + (mem:DI (reg:DI 7))) + (set (reg:DI 7) + (plus:DI (reg:DI 7) (const_int 8)))] + "TARGET_64BIT" + "pop{q}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "DI")]) + +(define_insn "*movdi_xor_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "const0_operand" "i")) + (clobber (reg:CC 17))] + "TARGET_64BIT && (!TARGET_USE_MOV0 || optimize_size) + && reload_completed" + "xor{l}\t{%k0, %k0|%k0, %k0}" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movdi_or_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "const_int_operand" "i")) + (clobber (reg:CC 17))] + "TARGET_64BIT && (TARGET_PENTIUM || optimize_size) + && reload_completed + && GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) == -1" { - if (STACK_REG_P (operands[1])) - { - rtx xops[3]; - - if (! STACK_TOP_P (operands[1])) - abort (); + operands[1] = constm1_rtx; + return "or{q}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "DI") + (set_attr "length_immediate" "1")]) + +(define_insn "*movdi_2" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y,!m,*Y,!*Y") + (match_operand:DI 1 "general_operand" "riFo,riF,*y,m,*Y,*Y,m"))] + "!TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + # + # + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmx,sse,sse,sse") + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")]) - xops[0] = AT_SP (SFmode); - xops[1] = GEN_INT (4); - xops[2] = stack_pointer_rtx; +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") - output_asm_insn (AS2 (sub%L2,%1,%2), xops); +;; %%% This multiword shite has got to go. +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0])) + && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") - if (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG)) - output_asm_insn (AS1 (fstp%S0,%0), xops); +(define_insn "*movdi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!m*y,!*y,!*Y,!m,!*Y") + (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,m,*Y,*Y,*m"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSE: + if (register_operand (operands[0], DImode) + && register_operand (operands[1], DImode)) + return "movdqa\t{%1, %0|%0, %1}"; + /* FALLTHRU */ + case TYPE_MMX: + return "movq\t{%1, %0|%0, %1}"; + case TYPE_MULTI: + return "#"; + case TYPE_LEA: + return "lea{q}\t{%a1, %0|%0, %a1}"; + default: + if (flag_pic && SYMBOLIC_CONST (operands[1])) + abort (); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else if (which_alternative == 2) + return "movabs{q}\t{%1, %0|%0, %1}"; else - output_asm_insn (AS1 (fst%S0,%0), xops); - - RET; + return "mov{q}\t{%1, %0|%0, %1}"; } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5,6") + (const_string "mmx") + (eq_attr "alternative" "7,8") + (const_string "sse") + (eq_attr "alternative" "4") + (const_string "multi") + (and (ne (symbol_ref "flag_pic") (const_int 0)) + (match_operand:DI 1 "symbolic_operand" "")) + (const_string "lea") + ] + (const_string "imov"))) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI")]) + +;; Stores and loads of ax to arbitary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabsdi_1_rex64" + [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) + (match_operand:DI 1 "nonmemory_operand" "a,er,i"))] + "TARGET_64BIT" + "@ + movabs{q}\t{%1, %P0|%P0, %1} + mov{q}\t{%1, %a0|%a0, %1} + movabs{q}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*,*") + (set_attr "length_address" "8,0,0") + (set_attr "length_immediate" "0,*,*") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*movabsdi_2_rex64" + [(set (match_operand:DI 0 "register_operand" "=a,r") + (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT" + "@ + movabs{q}\t{%P1, %0|%0, %P1} + mov{q}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "DI")]) + +;; Convert impossible stores of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, move by 32bit parts. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") - return AS1 (push%L0,%1); -}") +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +(define_peephole2 + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_di (operands, 2, operands + 2, operands + 4);") (define_split - [(set (match_operand:SF 0 "push_operand" "") - (match_operand:SF 1 "general_operand" ""))] - "reload_completed && STACK_REG_P (operands[1])" - [(set (reg:SI 7) - (minus:SI (reg:SI 7) (const_int 4))) - (set (mem:SF (reg:SI 7)) - (match_dup 1))] - "") + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && (flow2_completed || (reload_completed && !flag_peephole2)) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_di (operands, 2, operands + 2, operands + 4);") + +(define_insn "*swapdi_rex64" + [(set (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_64BIT" + "xchg{q}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "mode" "DI") + (set_attr "modrm" "0") + (set_attr "ppro_uops" "few")]) + (define_expand "movsf" - [(set (match_operand:SF 0 "general_operand" "") + [(set (match_operand:SF 0 "nonimmediate_operand" "") (match_operand:SF 1 "general_operand" ""))] "" - " + "ix86_expand_move (SFmode, operands); DONE;") + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f#rx,rFm#fx,x#rf"))] + "!TARGET_64BIT" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) + switch (which_alternative) { - operands[1] = force_reg (SFmode, operands[1]); - } + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (4); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - /* If we are loading a floating point constant that isn't 0 or 1 - into a register, force the value to memory now, since we'll - get better code out the back end. */ - else if ((reload_in_progress | reload_completed) == 0 - && GET_CODE (operands[0]) != MEM - && GET_CODE (operands[1]) == CONST_DOUBLE - && !standard_80387_constant_p (operands[1])) - { - operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); - } -}") + case 1: + return "push{l}\t%1"; + case 2: + return "#"; -;; For the purposes of regclass, prefer FLOAT_REGS. -(define_insn "" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f,m,!*r,!m") - (match_operand:SF 1 "general_operand" "fmG,f,*rmF,*rF"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* + default: + abort (); + } +} + [(set_attr "type" "multi,push,multi") + (set_attr "mode" "SF,SI,SF")]) + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f#rx,rF#fx,x#rf"))] + "TARGET_64BIT" { - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - - /* First handle a `pop' insn or a `fld %st(0)' */ - - if (STACK_TOP_P (operands[0]) && STACK_TOP_P (operands[1])) + switch (which_alternative) { - if (stack_top_dies) - return AS1 (fstp,%y0); + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (8); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; else - return AS1 (fld,%y0); - } + return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - /* Handle other kinds of writes from the 387 */ + case 1: + return "push{q}\t%q1"; - if (STACK_TOP_P (operands[1])) - { - if (stack_top_dies) - return AS1 (fstp%z0,%y0); - else - return AS1 (fst%z0,%y0); + case 2: + return "#"; + + default: + abort (); } +} + [(set_attr "type" "multi,push,multi") + (set_attr "mode" "SF,DI,SF")]) - /* Handle other kinds of reads to the 387 */ +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" ""))] + "reload_completed + && GET_CODE (operands[1]) == MEM + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))" + [(set (match_dup 0) + (match_dup 1))] + "operands[1] = get_pool_constant (XEXP (operands[1], 0));") - if (STACK_TOP_P (operands[0]) && GET_CODE (operands[1]) == CONST_DOUBLE) - return output_move_const_single (operands); - if (STACK_TOP_P (operands[0])) - return AS1 (fld%z1,%y1); +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "register_operand" ""))] + "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) + (set (mem:SF (reg:SI 7)) (match_dup 1))]) - /* Handle all SFmode moves not involving the 387 */ +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "register_operand" ""))] + "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) + (set (mem:SF (reg:DI 7)) (match_dup 1))]) + +(define_insn "*movsf_1" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m") + (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf"))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], SFmode))" +{ + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; - return singlemove_string (operands); -}" - [(set_attr "type" "fld")]) + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + abort(); + + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + return "pxor\t%0, %0"; + case 6: + if (TARGET_PARTIAL_REG_DEPENDENCY) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; + case 7: + case 8: + return "movss\t{%1, %0|%0, %1}"; + default: + abort(); + } +} + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sse,sse,sse,sse") + (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF")]) -(define_insn "swapsf" - [(set (match_operand:SF 0 "register_operand" "f") - (match_operand:SF 1 "register_operand" "f")) +(define_insn "*swapsf" + [(set (match_operand:SF 0 "register_operand" "+f") + (match_operand:SF 1 "register_operand" "+f")) (set (match_dup 1) (match_dup 0))] - "" - "* + "reload_completed || !TARGET_SSE" { if (STACK_TOP_P (operands[0])) - return AS1 (fxch,%1); + return "fxch\t%1"; else - return AS1 (fxch,%0); -}") + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "SF")]) +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + "ix86_expand_move (DFmode, operands); DONE;") -(define_insn "movdf_push" - [(set (match_operand:DF 0 "push_operand" "=<,<") - (match_operand:DF 1 "general_operand" "*rfF,o"))] - "TARGET_PUSH_MEMORY || GET_CODE (operands[1]) != MEM - || reload_in_progress || reload_completed" - "* +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer insturctions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. Allow this +;; pattern for optimize_size too. + +(define_insn "*pushdf_nointeger" + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))] + "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" { - if (STACK_REG_P (operands[1])) + switch (which_alternative) { - rtx xops[3]; - - xops[0] = AT_SP (DFmode); - xops[1] = GEN_INT (8); - xops[2] = stack_pointer_rtx; + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (8); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - output_asm_insn (AS2 (sub%L2,%1,%2), xops); + case 1: + case 2: + case 3: + return "#"; - if (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG)) - output_asm_insn (AS1 (fstp%Q0,%0), xops); + default: + abort (); + } +} + [(set_attr "type" "multi") + (set_attr "mode" "DF,SI,SI,DF")]) + +(define_insn "*pushdf_integer" + [(set (match_operand:DF 0 "push_operand" "=<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))] + "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +{ + switch (which_alternative) + { + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (8); + if (TARGET_64BIT) + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; else - output_asm_insn (AS1 (fst%Q0,%0), xops); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - RET; - } - if (which_alternative == 1) - return output_move_pushmem (operands, insn, GET_MODE_SIZE (DFmode), 0, 0); + case 1: + case 2: + return "#"; - return output_move_double (operands); -}") + default: + abort (); + } +} + [(set_attr "type" "multi") + (set_attr "mode" "DF,SI,DF")]) +;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:DF 0 "push_operand" "") - (match_operand:DF 1 "register_operand" ""))] - "reload_completed && STACK_REG_P (operands[1])" - [(set (reg:SI 7) - (minus:SI (reg:SI 7) (const_int 8))) - (set (mem:DF (reg:SI 7)) - (match_dup 1))] + (match_operand:DF 1 "register_operand" ""))] + "!TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:DF (reg:SI 7)) (match_dup 1))] "") -(define_expand "movdf" - [(set (match_operand:DF 0 "general_operand" "") +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "register_operand" ""))] + "TARGET_64BIT && reload_completed && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) + (set (mem:DF (reg:DI 7)) (match_dup 1))] + "") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") (match_operand:DF 1 "general_operand" ""))] - "" - " + "reload_completed" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Moving is usually shorter when only FP registers are used. This separate +;; movdf pattern avoids the use of integer registers for FP operations +;; when optimizing for size. + +(define_insn "*movdf_nointeger" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") + (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) - { - operands[1] = force_reg (DFmode, operands[1]); - } - - /* If we are loading a floating point constant that isn't 0 or 1 into a - register, indicate we need the pic register loaded. This could be - optimized into stores of constants if the target eventually moves to - memory, but better safe than sorry. */ - else if ((reload_in_progress | reload_completed) == 0 - && GET_CODE (operands[0]) != MEM - && GET_CODE (operands[1]) == CONST_DOUBLE - && !standard_80387_constant_p (operands[1])) + switch (which_alternative) { - operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); - } -}") - -;; For the purposes of regclass, prefer FLOAT_REGS. -(define_insn "" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,!*r,!o") - (match_operand:DF 1 "general_operand" "fmG,f,*roF,*rF"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) - || (GET_CODE (operands[1]) != MEM)" - "* -{ - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - - /* First handle a `pop' insn or a `fld %st(0)' */ + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; - if (STACK_TOP_P (operands[0]) && STACK_TOP_P (operands[1])) - { - if (stack_top_dies) - return AS1 (fstp,%y0); + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; else - return AS1 (fld,%y0); - } + return "fst%z0\t%y0"; - /* Handle other kinds of writes from the 387 */ + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + abort(); + + case 3: + case 4: + return "#"; + case 5: + return "pxor\t%0, %0"; + case 6: + if (TARGET_PARTIAL_REG_DEPENDENCY) + return "movapd\t{%1, %0|%0, %1}"; + else + return "movsd\t{%1, %0|%0, %1}"; + case 7: + case 8: + return "movsd\t{%1, %0|%0, %1}"; - if (STACK_TOP_P (operands[1])) + default: + abort(); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") + (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + +(define_insn "*movdf_integer" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") + (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && !optimize_size && TARGET_INTEGER_DFMODE_MOVES + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) { - if (stack_top_dies) - return AS1 (fstp%z0,%y0); + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; else - return AS1 (fst%z0,%y0); - } + return "fst\t%y0"; - /* Handle other kinds of reads to the 387 */ - - if (STACK_TOP_P (operands[0]) && GET_CODE (operands[1]) == CONST_DOUBLE) - return output_move_const_single (operands); + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; - if (STACK_TOP_P (operands[0])) - return AS1 (fld%z1,%y1); + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + abort(); - /* Handle all DFmode moves not involving the 387 */ + case 3: + case 4: + return "#"; - return output_move_double (operands); -}" - [(set_attr "type" "fld")]) + case 5: + return "pxor\t%0, %0"; + case 6: + if (TARGET_PARTIAL_REG_DEPENDENCY) + return "movapd\t{%1, %0|%0, %1}"; + else + return "movsd\t{%1, %0|%0, %1}"; + case 7: + case 8: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort(); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sse,sse,sse,sse") + (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) +(define_split + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") -(define_insn "swapdf" - [(set (match_operand:DF 0 "register_operand" "f") - (match_operand:DF 1 "register_operand" "f")) +(define_insn "*swapdf" + [(set (match_operand:DF 0 "register_operand" "+f") + (match_operand:DF 1 "register_operand" "+f")) (set (match_dup 1) (match_dup 0))] - "" - "* + "reload_completed || !TARGET_SSE2" { if (STACK_TOP_P (operands[0])) - return AS1 (fxch,%1); + return "fxch\t%1"; else - return AS1 (fxch,%0); -}") + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "DF")]) -(define_insn "movxf_push" - [(set (match_operand:XF 0 "push_operand" "=<,<") - (match_operand:XF 1 "general_operand" "*rfF,o"))] - "TARGET_PUSH_MEMORY || GET_CODE (operands[1]) != MEM - || reload_in_progress || reload_completed" - "* +(define_expand "movxf" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "!TARGET_64BIT" + "ix86_expand_move (XFmode, operands); DONE;") + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "" + "ix86_expand_move (TFmode, operands); DONE;") + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer insturctions is 3+3*memory operand size +;; Pushing using integer instructions is longer except for constants +;; and direct memory references. +;; (assuming that any given constant is pushed only once, but this ought to be +;; handled elsewhere). + +(define_insn "*pushxf_nointeger" + [(set (match_operand:XF 0 "push_operand" "=X,X,X") + (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] + "!TARGET_64BIT && optimize_size" { - if (STACK_REG_P (operands[1])) + switch (which_alternative) { - rtx xops[3]; + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (12); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - xops[0] = AT_SP (XFmode); - xops[1] = GEN_INT (12); - xops[2] = stack_pointer_rtx; + case 1: + case 2: + return "#"; - output_asm_insn (AS2 (sub%L2,%1,%2), xops); + default: + abort (); + } +} + [(set_attr "type" "multi") + (set_attr "mode" "XF,SI,SI")]) + +(define_insn "*pushtf_nointeger" + [(set (match_operand:TF 0 "push_operand" "=<,<,<") + (match_operand:TF 1 "general_no_elim_operand" "f,Fo,*r"))] + "optimize_size" +{ + switch (which_alternative) + { + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (16); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - output_asm_insn (AS1 (fstp%T0,%0), xops); - if (! find_regno_note (insn, REG_DEAD, FIRST_STACK_REG)) - output_asm_insn (AS1 (fld%T0,%0), xops); + case 1: + case 2: + return "#"; - RET; + default: + abort (); } +} + [(set_attr "type" "multi") + (set_attr "mode" "XF,SI,SI")]) - if (which_alternative == 1) - return output_move_pushmem (operands, insn, GET_MODE_SIZE (XFmode), 0, 0); +(define_insn "*pushxf_integer" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))] + "!TARGET_64BIT && !optimize_size" +{ + switch (which_alternative) + { + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (12); + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; + + case 1: + return "#"; + + default: + abort (); + } +} + [(set_attr "type" "multi") + (set_attr "mode" "XF,SI")]) + +(define_insn "*pushtf_integer" + [(set (match_operand:TF 0 "push_operand" "=<,<") + (match_operand:TF 1 "general_no_elim_operand" "f#r,rFo#f"))] + "!optimize_size" +{ + switch (which_alternative) + { + case 0: + /* %%% We loose REG_DEAD notes for controling pops if we split late. */ + operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); + operands[2] = stack_pointer_rtx; + operands[3] = GEN_INT (16); + if (TARGET_64BIT) + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; + else + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; + else + return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - return output_move_double (operands); - }") + case 1: + return "#"; + + default: + abort (); + } +} + [(set_attr "type" "multi") + (set_attr "mode" "XF,SI")]) + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && (GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == DFmode) + && (!REG_P (operands[1]) || !ANY_FP_REGNO_P (REGNO (operands[1])))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") (define_split [(set (match_operand:XF 0 "push_operand" "") - (match_operand:XF 1 "register_operand" ""))] - "reload_completed && STACK_REG_P (operands[1])" - [(set (reg:SI 7) - (minus:SI (reg:SI 7) (const_int 12))) - (set (mem:XF (reg:SI 7)) - (match_dup 1))] - "") + (match_operand:XF 1 "register_operand" ""))] + "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) + (set (mem:XF (reg:SI 7)) (match_dup 1))]) -(define_expand "movxf" - [(set (match_operand:XF 0 "general_operand" "") - (match_operand:XF 1 "general_operand" ""))] - "" - " +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "register_operand" ""))] + "!TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:TF (reg:SI 7)) (match_dup 1))]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "register_operand" ""))] + "TARGET_64BIT && ANY_FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) + (set (mem:TF (reg:DI 7)) (match_dup 1))]) + +;; Do not use integer registers when optimizing for size +(define_insn "*movxf_nointeger" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "!TARGET_64BIT + && optimize_size + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) + switch (which_alternative) { - operands[1] = force_reg (XFmode, operands[1]); - } + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; - /* If we are loading a floating point constant that isn't 0 or 1 - into a register, indicate we need the pic register loaded. This could - be optimized into stores of constants if the target eventually moves - to memory, but better safe than sorry. */ - else if ((reload_in_progress | reload_completed) == 0 - && GET_CODE (operands[0]) != MEM - && GET_CODE (operands[1]) == CONST_DOUBLE - && !standard_80387_constant_p (operands[1])) - { - operands[1] = validize_mem (force_const_mem (XFmode, operands[1])); - } -}") + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\;fld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + break; -(define_insn "" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,!*r,!o") - (match_operand:XF 1 "general_operand" "fmG,f,*roF,*rF"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) - || (GET_CODE (operands[1]) != MEM)" - "* + case 3: case 4: + return "#"; + } + abort(); +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_insn "*movtf_nointeger" + [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:TF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && optimize_size + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || memory_operand (operands[0], TFmode))" { - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; - /* First handle a `pop' insn or a `fld %st(0)' */ + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\;fld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + break; - if (STACK_TOP_P (operands[0]) && STACK_TOP_P (operands[1])) + case 3: case 4: + return "#"; + } + abort(); +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_insn "*movxf_integer" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") + (match_operand:XF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] + "!TARGET_64BIT + && !optimize_size + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) { - if (stack_top_dies) - return AS1 (fstp,%y0); + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; else - return AS1 (fld,%y0); - } + return "fst\t%y0"; - /* Handle other kinds of writes from the 387 */ + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\;fld%z0\t%y0"; + else + return "fstp%z0\t%y0"; - if (STACK_TOP_P (operands[1])) - { - output_asm_insn (AS1 (fstp%z0,%y0), operands); - if (! stack_top_dies) - return AS1 (fld%z0,%y0); + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + break; - RET; + case 3: case 4: + return "#"; } + abort(); +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_insn "*movtf_integer" + [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") + (match_operand:TF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && !optimize_size + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || memory_operand (operands[0], TFmode))" +{ + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; - /* Handle other kinds of reads to the 387 */ + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\;fld%z0\t%y0"; + else + return "fstp%z0\t%y0"; - if (STACK_TOP_P (operands[0]) && GET_CODE (operands[1]) == CONST_DOUBLE) - return output_move_const_single (operands); + case 2: + switch (standard_80387_constant_p (operands[1])) + { + case 1: + return "fldz"; + case 2: + return "fld1"; + } + break; - if (STACK_TOP_P (operands[0])) - return AS1 (fld%z1,%y1); + case 3: case 4: + return "#"; + } + abort(); +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) - /* Handle all XFmode moves not involving the 387 */ +(define_split + [(set (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") - return output_move_double (operands); -}") +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "memory_operand" ""))] + "reload_completed + && GET_CODE (operands[1]) == MEM + && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)) + && (!(SSE_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (SUBREG_REG (operands[0])))) + || standard_sse_constant_p (get_pool_constant (XEXP (operands[1], 0)))) + && (!(FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && FP_REG_P (SUBREG_REG (operands[0])))) + || standard_80387_constant_p (get_pool_constant (XEXP (operands[1], 0))))" + [(set (match_dup 0) + (match_dup 1))] + "operands[1] = get_pool_constant (XEXP (operands[1], 0));") -(define_insn "swapxf" - [(set (match_operand:XF 0 "register_operand" "f") - (match_operand:XF 1 "register_operand" "f")) +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) (set (match_dup 1) (match_dup 0))] "" - "* { if (STACK_TOP_P (operands[0])) - return AS1 (fxch,%1); + return "fxch\t%1"; else - return AS1 (fxch,%0); -}") - -(define_insn "" - [(set (match_operand:DI 0 "push_operand" "=<") - (match_operand:DI 1 "general_operand" "riF"))] - "" - "* return output_move_double (operands);") - -(define_insn "" - [(set (match_operand:DI 0 "push_operand" "=<") - (match_operand:DI 1 "memory_operand" "o"))] - "TARGET_PUSH_MEMORY" - "* return output_move_pushmem (operands, insn, GET_MODE_SIZE (DImode),0,0);") - -(define_expand "movdi" - [(set (match_operand:DI 0 "general_operand" "") - (match_operand:DI 1 "general_operand" ""))] + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +(define_insn "swaptf" + [(set (match_operand:TF 0 "register_operand" "+f") + (match_operand:TF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] "" - " { - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && no_new_pseudos == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) - { - operands[1] = force_reg (DImode, operands[1]); - } -}") - -(define_insn "" - [(set (match_operand:DI 0 "general_operand" "=g,r") - (match_operand:DI 1 "general_operand" "riF,m"))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) - || (GET_CODE (operands[1]) != MEM)" - "* return output_move_double (operands);" - [(set_attr "type" "integer,memory") - (set_attr "memory" "*,load")]) - -(define_split - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (match_operand:DI 1 "general_operand" ""))] - "reload_completed - && (offsettable_memref_p (operands[0]) - || nonmemory_operand (operands[0], DImode)) - && (offsettable_memref_p (operands[1]) - || nonmemory_operand (operands[1], DImode)) - && (! reg_overlap_mentioned_p (gen_lowpart (SImode, operands[0]), - operands[1]) - || ! reg_overlap_mentioned_p (gen_highpart (SImode, operands[0]), - operands[1]))" - [(set (match_dup 2) - (match_dup 4)) - (set (match_dup 3) - (match_dup 5))] - " -{ - split_di (&operands[0], 1, &operands[2], &operands[3]); - split_di (&operands[1], 1, &operands[4], &operands[5]); - - if (reg_overlap_mentioned_p (operands[2], operands[1])) - { - rtx tmp; - - tmp = operands[2]; - operands[2] = operands[3]; - operands[3] = tmp; - - tmp = operands[4]; - operands[4] = operands[5]; - operands[5] = tmp; - } -}") + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) -;;- conversion instructions -;;- NONE - -;;- zero extension instructions -;; See comments by `andsi' for when andl is faster than movzx. +;; Zero extension instructions (define_expand "zero_extendhisi2" [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] "" - "") - -;; When optimizing for the PPro/PII or code size, always use movzwl. -;; We want to use a different pattern so we can use different constraints -;; than the generic pattern. -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] - "(optimize_size || (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)" - "* return AS2 (movz%W0%L0,%1,%0);") - -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=r,&r,?r") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "0,rm,rm")))] - "! (optimize_size || (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)" - "* - { - rtx xops[2]; - - if ((TARGET_ZERO_EXTEND_WITH_AND || REGNO (operands[0]) == 0) - && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])) - { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xffff); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - RET; - } - if (TARGET_ZERO_EXTEND_WITH_AND && !reg_overlap_mentioned_p (operands[0], operands[1])) - { - output_asm_insn (AS2 (xor%L0,%0,%0),operands); - output_asm_insn (AS2 (mov%W0,%1,%w0),operands); - RET; - } - - if (TARGET_ZERO_EXTEND_WITH_AND) +{ + if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xffff); - if (i386_aligned_p (operands[1])) - output_asm_insn (AS2 (mov%L0,%k1,%k0),operands); - else - output_asm_insn (AS2 (mov%W0,%1,%w0),operands); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - RET; + operands[1] = force_reg (HImode, operands[1]); + emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); + DONE; } +}) -#ifdef INTEL_SYNTAX - return AS2 (movzx,%1,%0); -#else - return AS2 (movz%W0%L0,%1,%0); -#endif -}") +(define_insn "zero_extendhisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) + (clobber (reg:CC 17))] + "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) (define_split [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] - "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (match_dup 0) - (const_int 0)) - (set (strict_low_part (match_dup 2)) - (match_dup 1))] - "operands[2] = gen_rtx_REG (HImode, true_regnum (operands[0]));") - + (zero_extend:SI (match_operand:HI 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) + (clobber (reg:CC 17))])] + "") -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:HI 1 "memory_operand" "")))] - "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (strict_low_part (match_dup 2)) - (match_dup 1)) - (set (match_dup 0) - (and:SI (match_dup 0) - (const_int 65535)))] - "operands[2] = gen_rtx_REG (HImode, true_regnum (operands[0]));") +(define_insn "*zero_extendhisi2_movzwl" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "movz{wl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) (define_expand "zero_extendqihi2" - [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))] + [(parallel + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] "" "") -(define_insn "" - [(set (match_operand:HI 0 "register_operand" "=r") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "optimize_size || (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO" - - "* return AS2 (movz%B0%W0,%1,%0);") +(define_insn "*zero_extendqihi2_and" + [(set (match_operand:HI 0 "register_operand" "=r,?&q") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC 17))] + "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "HI")]) -(define_insn "" - [(set (match_operand:HI 0 "register_operand" "=q,&q,?r") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm,qm")))] - "! (optimize_size || (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)" - "* - { - rtx xops[2]; +(define_insn "*zero_extendqihi2_movzbw_and" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC 17))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "HI")]) - if ((TARGET_ZERO_EXTEND_WITH_AND || REGNO (operands[0]) == 0) - && REG_P (operands[1]) - && REGNO (operands[0]) == REGNO (operands[1])) - { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xff); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - RET; - } - if (TARGET_ZERO_EXTEND_WITH_AND && QI_REG_P (operands[0])) - { - if(!reg_overlap_mentioned_p(operands[0],operands[1])) - { - output_asm_insn (AS2 (xor%L0,%k0,%k0), operands); - output_asm_insn (AS2 (mov%B0,%1,%b0), operands); - } - else - { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xff); - output_asm_insn (AS2 (mov%B0,%1,%b0),operands); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - } - RET; - } - -#ifdef INTEL_SYNTAX - return AS2 (movzx,%1,%0); -#else - return AS2 (movz%B0%W0,%1,%0); -#endif -}") +(define_insn "*zero_extendqihi2_movzbw" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" + "movz{bw|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "HI")]) +;; For the movzbw case strip only the clobber (define_split [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))] - "reload_completed && QI_REG_P (operands[0]) && TARGET_ZERO_EXTEND_WITH_AND - && !reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (match_dup 0) - (const_int 0)) - (set (strict_low_part (match_dup 2)) - (match_dup 1))] - "operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));") - + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]) +;; When source and destination does not overlap, clear destination +;; first and then do the movb (define_split [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "memory_operand" "")))] - "reload_completed && QI_REG_P (operands[0]) && TARGET_ZERO_EXTEND_WITH_AND - && reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (strict_low_part (match_dup 2)) - (match_dup 1)) - (set (match_dup 0) - (and:HI (match_dup 0) - (const_int 255)))] - "operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));") - + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 0) (const_int 0)) + (set (strict_low_part (match_dup 2)) (match_dup 1))] + "operands[2] = gen_lowpart (QImode, operands[0]);") + +;; Rest is handled by single and. (define_split [(set (match_operand:HI 0 "register_operand" "") - (zero_extend:HI (match_operand:QI 1 "register_operand" "")))] - "reload_completed && TARGET_ZERO_EXTEND_WITH_AND" - [(set (match_dup 0) - (match_dup 2)) - (set (match_dup 0) - (and:HI (match_dup 0) - (const_int 255)))] - "if (GET_CODE (operands[1]) == SUBREG && SUBREG_WORD (operands[1]) == 0) - operands[1] = SUBREG_REG (operands[1]); - if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG - || REGNO (operands[0]) == REGNO (operands[1])) - FAIL; - operands[2] = gen_rtx_REG (HImode, REGNO (operands[1]));") + (zero_extend:HI (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255))) + (clobber (reg:CC 17))])] + "") (define_expand "zero_extendqisi2" - [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))] + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] "" "") -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=r") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] - "optimize_size || (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO" - "* return AS2 (movz%B0%L0,%1,%0);") - -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=q,&q,?r") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm,qm")))] - "! (optimize_size || (int)ix86_cpu == (int)PROCESSOR_PENTIUMPRO)" - "* - { - rtx xops[2]; - - if ((TARGET_ZERO_EXTEND_WITH_AND || REGNO (operands[0]) == 0) - && REG_P (operands[1]) - && REGNO (operands[0]) == REGNO (operands[1])) - { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xff); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - RET; - } - if (TARGET_ZERO_EXTEND_WITH_AND && QI_REG_P (operands[0])) - { - if(!reg_overlap_mentioned_p (operands[0], operands[1])) - { - output_asm_insn (AS2 (xor%L0,%0,%0),operands); - output_asm_insn (AS2 (mov%B0,%1,%b0),operands); - } - else - { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xff); - output_asm_insn (AS2 (mov%B0,%1,%b0), operands); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - } - RET; - } +(define_insn "*zero_extendqisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r,?&q") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC 17))] + "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) - if (TARGET_ZERO_EXTEND_WITH_AND && GET_CODE (operands[1]) == REG) - { - xops[0] = operands[0]; - xops[1] = GEN_INT (0xff); - operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); - output_asm_insn (AS2 (mov%L0,%1,%0), operands); - output_asm_insn (AS2 (and%L0,%1,%k0), xops); - RET; - } +(define_insn "*zero_extendqisi2_movzbw_and" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC 17))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "SI")]) -#ifdef INTEL_SYNTAX - return AS2 (movzx,%1,%0); -#else - return AS2 (movz%B0%L0,%1,%0); -#endif -}") +(define_insn "*zero_extendqisi2_movzbw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed" + "movz{bl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) +;; For the movzbl case strip only the clobber (define_split [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))] - "reload_completed && QI_REG_P (operands[0]) && TARGET_ZERO_EXTEND_WITH_AND - && !reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (match_dup 0) - (const_int 0)) - (set (strict_low_part (match_dup 2)) - (match_dup 1))] - "operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));") - + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_dup 0) + (zero_extend:SI (match_dup 1)))]) +;; When source and destination does not overlap, clear destination +;; first and then do the movb (define_split [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "memory_operand" "")))] - "reload_completed && QI_REG_P (operands[0]) && TARGET_ZERO_EXTEND_WITH_AND - && reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (strict_low_part (match_dup 2)) - (match_dup 1)) - (set (match_dup 0) - (and:SI (match_dup 0) - (const_int 255)))] - "operands[2] = gen_rtx_REG (QImode, REGNO (operands[0]));") - + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM) + && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 0) (const_int 0)) + (set (strict_low_part (match_dup 2)) (match_dup 1))] + "operands[2] = gen_lowpart (QImode, operands[0]);") + +;; Rest is handled by single and. (define_split [(set (match_operand:SI 0 "register_operand" "") - (zero_extend:SI (match_operand:QI 1 "register_operand" "")))] - "reload_completed && TARGET_ZERO_EXTEND_WITH_AND - && ! reg_overlap_mentioned_p (operands[0], operands[1])" - [(set (match_dup 0) - (match_dup 2)) - (set (match_dup 0) - (and:SI (match_dup 0) - (const_int 255)))] - "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") - -(define_insn "zero_extendsidi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o") - (zero_extend:DI (match_operand:SI 1 "general_operand" "0,rm,r")))] + (zero_extend:SI (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) + (clobber (reg:CC 17))])] + "") + +;; %%% Kill me once multi-word ops are sane. +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))] "" - "#") + "if (!TARGET_64BIT) + { + emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); + DONE; + } + ") + +(define_insn "zero_extendsidi2_32" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "#" + [(set_attr "mode" "SI")]) + +(define_insn "zero_extendsidi2_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0")))] + "TARGET_64BIT" + "@ + mov\t{%k1, %k0|%k0, %k1} + #" + [(set_attr "type" "imovx,imov") + (set_attr "mode" "SI,DI")]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (zero_extend:DI (match_dup 0)))] + "TARGET_64BIT" + [(set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") (define_split [(set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI (match_operand:SI 1 "register_operand" "")))] - "reload_completed && true_regnum (operands[0]) == true_regnum (operands[1])" + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" [(set (match_dup 4) (const_int 0))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") (define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") - (zero_extend:DI (match_operand:SI 1 "general_operand" "")))] - "reload_completed" + (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))] + "TARGET_64BIT" + "@ + movz{wl|x}\t{%1, %k0|%k0, %1} + movz{wq|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI,DI")]) + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "Q,m")))] + "TARGET_64BIT" + "@ + movz{bl|x}\t{%1, %k0|%k0, %1} + movz{bq|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI,DI")]) -;;- sign extension instructions +;; Sign extension instructions -(define_insn "extendsidi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=A,?r,?Ar,*o") - (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,*r"))) - (clobber (match_scratch:SI 2 "=X,X,X,&r"))] +(define_expand "extendsidi2" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC 17)) + (clobber (match_scratch:SI 2 ""))])] "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC 17)) + (clobber (match_scratch:SI 2 "=X,X,X,&r"))] + "!TARGET_64BIT" "#") +(define_insn "extendsidi2_rex64" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] + "TARGET_64BIT" + "@ + {cltq|cdqe} + movs{lq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI") + (set_attr "prefix_0f" "0") + (set_attr "modrm" "0,1")]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movs{wq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "TARGET_64BIT" + "movs{bq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + ;; Extend to memory case when source register does die. (define_split [(set (match_operand:DI 0 "memory_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC 17)) (clobber (match_operand:SI 2 "register_operand" ""))] - "(flow2_completed + "(reload_completed && dead_or_set_p (insn, operands[1]) && !reg_mentioned_p (operands[1], operands[0]))" [(set (match_dup 3) (match_dup 1)) - (set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC 17))]) (set (match_dup 4) (match_dup 1))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") @@ -2065,10 +3785,10 @@ (define_split [(set (match_operand:DI 0 "memory_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC 17)) (clobber (match_operand:SI 2 "register_operand" ""))] - "flow2_completed" + "reload_completed" [(const_int 0)] - " { split_di (&operands[0], 1, &operands[3], &operands[4]); @@ -2077,28 +3797,28 @@ /* Generate a cltd if possible and doing so it profitable. */ if (true_regnum (operands[1]) == 0 && true_regnum (operands[2]) == 1 - && (optimize_size || !TARGET_PENTIUM)) + && (optimize_size || TARGET_USE_CLTD)) { - emit_insn (gen_ashrsi3_31 (operands[2], operands[1])); + emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31))); } else { emit_move_insn (operands[2], operands[1]); - emit_insn (gen_ashrsi3_31 (operands[2], operands[2])); + emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31))); } emit_move_insn (operands[4], operands[2]); DONE; -}") +}) ;; Extend to register case. Optimize case where source and destination ;; registers match and cases where we can use cltd. (define_split [(set (match_operand:DI 0 "register_operand" "") (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC 17)) (clobber (match_scratch:SI 2 ""))] "reload_completed" [(const_int 0)] - " { split_di (&operands[0], 1, &operands[3], &operands[4]); @@ -2107,1600 +3827,3052 @@ /* Generate a cltd if possible and doing so it profitable. */ if (true_regnum (operands[3]) == 0 - && (optimize_size || !TARGET_PENTIUM)) + && (optimize_size || TARGET_USE_CLTD)) { - emit_insn (gen_ashrsi3_31 (operands[4], operands[3])); + emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31))); DONE; } if (true_regnum (operands[4]) != true_regnum (operands[1])) emit_move_insn (operands[4], operands[1]); - emit_insn (gen_ashrsi3_31 (operands[4], operands[4])); + emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31))); DONE; -}") - -;; Note that the i386 programmers' manual says that the opcodes -;; are named movsx..., but the assembler on Unix does not accept that. -;; We use what the Unix assembler expects. +}) (define_insn "extendhisi2" - [(set (match_operand:SI 0 "register_operand" "=r") - (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + [(set (match_operand:SI 0 "register_operand" "=*a,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] "" - "* { - if (REGNO (operands[0]) == 0 - && REG_P (operands[1]) && REGNO (operands[1]) == 0 - && (optimize_size || ix86_cpu != PROCESSOR_K6)) -#ifdef INTEL_SYNTAX - return \"cwde\"; -#else - return \"cwtl\"; -#endif - -#ifdef INTEL_SYNTAX - return AS2 (movsx,%1,%0); -#else - return AS2 (movs%W0%L0,%1,%0); -#endif -}") + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1,%0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "*extendhisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (zero_extend:DI + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] + "TARGET_64BIT" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1,%k0|%k0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) (define_insn "extendqihi2" - [(set (match_operand:HI 0 "register_operand" "=r") - (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + [(set (match_operand:HI 0 "register_operand" "=*a,r") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] "" - "* { - if (REGNO (operands[0]) == 0 - && REG_P (operands[1]) && REGNO (operands[1]) == 0 - && (optimize_size || ix86_cpu != PROCESSOR_K6)) - return \"cbtw\"; - -#ifdef INTEL_SYNTAX - return AS2 (movsx,%1,%0); -#else - return AS2 (movs%B0%W0,%1,%0); -#endif -}") + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cbtw|cbw}"; + default: + return "movs{bw|x}\t{%1,%0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "HI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) (define_insn "extendqisi2" [(set (match_operand:SI 0 "register_operand" "=r") (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] "" - "* -{ -#ifdef INTEL_SYNTAX - return AS2 (movsx,%1,%0); -#else - return AS2 (movs%B0%L0,%1,%0); -#endif -}") - + "movs{bl|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*extendqisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))] + "TARGET_64BIT" + "movs{bl|x}\t{%1,%k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) -;; Truncation of long long -> 32 bit +;; Conversions between float and double. -(define_expand "truncdisi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m") - (truncate:SI (match_operand:DI 1 "nonimmediate_operand" "ro,r")))] - "" - " -{ - /* Don't generate memory->memory moves, go through a register */ - if (TARGET_MOVE - && (reload_in_progress | reload_completed) == 0 - && GET_CODE (operands[0]) == MEM - && GET_CODE (operands[1]) == MEM) - { - rtx target = gen_reg_rtx (SImode); - emit_insn (gen_truncdisi2 (target, operands[1])); - emit_move_insn (operands[0], target); - DONE; - } -}") +;; These are all no-ops in the model used for the 80387. So just +;; emit moves. -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m") - (truncate:SI (match_operand:DI 1 "nonimmediate_operand" "ro,r")))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* -{ - rtx low[2], high[2], xops[2]; +;; %%% Kill these when call knows how to work out a DFmode push earlier. +(define_insn "*dummy_extendsfdf2" + [(set (match_operand:DF 0 "push_operand" "=<") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY")))] + "0" + "#") - split_di (&operands[1], 1, low, high); - xops[0] = operands[0]; - xops[1] = low[0]; - if (!rtx_equal_p (xops[0], xops[1])) - output_asm_insn (AS2 (mov%L0,%1,%0), xops); +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (float_extend:DF (match_operand:SF 1 "register_operand" "")))] + "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:DF (reg:SI 7)) (float_extend:DF (match_dup 1)))]) - RET; -}") +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (float_extend:DF (match_operand:SF 1 "register_operand" "")))] + "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) + (set (mem:DF (reg:DI 7)) (float_extend:DF (match_dup 1)))]) + +(define_insn "*dummy_extendsfxf2" + [(set (match_operand:XF 0 "push_operand" "=<") + (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))] + "0" + "#") -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m") - (truncate:SI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "ro,r") - (const_int 32))))] - "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* -{ - rtx low[2], high[2], xops[2]; +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:SF 1 "register_operand" "")))] + "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) + (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) + +(define_insn "*dummy_extendsftf2" + [(set (match_operand:TF 0 "push_operand" "=<") + (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "f")))] + "0" + "#") - split_di (&operands[1], 1, low, high); - xops[0] = operands[0]; - xops[1] = high[0]; - if (!rtx_equal_p (xops[0], xops[1])) - output_asm_insn (AS2 (mov%L0,%1,%0), xops); +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (float_extend:TF (match_operand:SF 1 "register_operand" "")))] + "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:TF (reg:SI 7)) (float_extend:TF (match_dup 1)))]) - RET; -}") +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (float_extend:TF (match_operand:SF 1 "register_operand" "")))] + "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) + (set (mem:DF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) + +(define_insn "*dummy_extenddfxf2" + [(set (match_operand:XF 0 "push_operand" "=<") + (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "f")))] + "0" + "#") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:DF 1 "register_operand" "")))] + "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) + (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) + +(define_insn "*dummy_extenddftf2" + [(set (match_operand:TF 0 "push_operand" "=<") + (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "f")))] + "0" + "#") +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (float_extend:TF (match_operand:DF 1 "register_operand" "")))] + "!TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:TF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) - -;; Conversions between float and double. +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (float_extend:TF (match_operand:DF 1 "register_operand" "")))] + "TARGET_64BIT && FP_REGNO_P (REGNO (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) + (set (mem:TF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) (define_expand "extendsfdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] - "TARGET_80387" - " + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE2" { if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (SFmode, operands[1]); +}) - operands[2] = assign_386_stack_local (SFmode, 0); - operands[3] = assign_386_stack_local (DFmode, 0); -}") - -(define_insn "" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,!f,!*r") - (float_extend:DF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,*r,f"))) - (clobber (match_operand:SF 2 "memory_operand" "m,m,m,m")) - (clobber (match_operand:DF 3 "memory_operand" "m,m,m,o"))] - "TARGET_80387 && (GET_CODE (operands[0]) != MEM - || GET_CODE (operands[1]) != MEM)" - "* +(define_insn "*extendsfdf2_1" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,mf#Y,Y#f") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm#Y,f#Y,mY#f")))] + "(TARGET_80387 || TARGET_SSE2) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { - output_float_extend (insn, operands); - return \"\"; -}" - [(set_attr "type" "fld,fpop,fld,fpop")]) + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" "")) - (clobber (match_operand:DF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed && NON_STACK_REG_P (operands[1])" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float_extend:DF (match_dup 2)))] - "") + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_extend:DF (match_operand:SF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" "")) - (clobber (match_operand:DF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed && NON_STACK_REG_P (operands[0])" - [(set (match_dup 3) - (float_extend:DF (match_dup 1))) - (set (match_dup 0) - (match_dup 3))] - "") + else + return "fst%z0\t%y0"; + case 2: + return "cvtss2sd\t{%1, %0|%0, %1}"; -(define_split - [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" "")) - (clobber (match_operand:DF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float_extend:DF (match_dup 1)))] - "") + default: + abort (); + } +} + [(set_attr "type" "fmov,fmov,sse") + (set_attr "mode" "SF,XF,DF")]) + +(define_insn "*extendsfdf2_1_sse_only" + [(set (match_operand:DF 0 "register_operand" "=Y") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "mY")))] + "!TARGET_80387 && TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "cvtss2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) -(define_insn "" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387 && (GET_CODE (operands[0]) != MEM - || GET_CODE (operands[1]) != MEM)" - "* +(define_expand "extendsfxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "")))] + "!TARGET_64BIT && TARGET_80387" { - output_float_extend (insn, operands); - return \"\"; -}" - [(set_attr "type" "fld,fpop")]) + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (SFmode, operands[1]); +}) -(define_expand "extenddfxf2" - [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF - (match_operand:DF 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] +(define_insn "*extendsfxf2_1" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") + (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "!TARGET_64BIT && TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; + + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\n\tfld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + + default: + abort (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extendsftf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "")))] "TARGET_80387" - " { if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (DFmode, operands[1]); - - operands[2] = assign_386_stack_local (DFmode, 0); - operands[3] = assign_386_stack_local (XFmode, 0); -}") + operands[1] = force_reg (SFmode, operands[1]); +}) -(define_insn "" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,!f,!*r") - (float_extend:XF - (match_operand:DF 1 "nonimmediate_operand" "fm,f,*r,f"))) - (clobber (match_operand:DF 2 "memory_operand" "m,m,o,m")) - (clobber (match_operand:XF 3 "memory_operand" "m,m,m,o"))] - "TARGET_80387 && (GET_CODE (operands[0]) != MEM - || GET_CODE (operands[1]) != MEM)" - "* +(define_insn "*extendsftf2_1" + [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m") + (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { - output_float_extend (insn, operands); - return \"\"; -}" - [(set_attr "type" "fld,fpop,fld,fpop")]) + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float_extend:XF (match_operand:DF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" "")) - (clobber (match_operand:XF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed && NON_STACK_REG_P (operands[1])" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float_extend:XF (match_dup 2)))] - "") + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\n\tfld%z0\t%y0"; + else + return "fstp%z0\t%y0"; -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float_extend:XF (match_operand:DF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" "")) - (clobber (match_operand:XF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed && NON_STACK_REG_P (operands[0])" - [(set (match_dup 3) - (float_extend:XF (match_dup 1))) - (set (match_dup 0) - (match_dup 3))] - "") + default: + abort (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) -(define_split +(define_expand "extenddfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" "")) - (clobber (match_operand:XF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float_extend:XF (match_dup 1)))] - "") + (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "")))] + "!TARGET_64BIT && TARGET_80387" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + operands[1] = force_reg (DFmode, operands[1]); +}) -(define_insn "" +(define_insn "*extenddfxf2_1" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387 && (GET_CODE (operands[0]) != MEM - || GET_CODE (operands[1]) != MEM)" - "* + (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] + "!TARGET_64BIT && TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { - output_float_extend (insn, operands); - return \"\"; -}" - [(set_attr "type" "fld,fpop")]) + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; -(define_expand "extendsfxf2" - [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF - (match_operand:SF 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2)) - (clobber (match_dup 3))])] + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\n\tfld%z0\t%y0"; + else + return "fstp%z0\t%y0"; + + default: + abort (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF,XF")]) + +(define_expand "extenddftf2" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "")))] "TARGET_80387" - " { if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (SFmode, operands[1]); - - operands[2] = assign_386_stack_local (SFmode, 0); - operands[3] = assign_386_stack_local (XFmode, 0); -}") + operands[1] = force_reg (DFmode, operands[1]); +}) -(define_insn "" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,!f,!*r") - (float_extend:XF - (match_operand:SF 1 "nonimmediate_operand" "fm,f,*r,f"))) - (clobber (match_operand:SF 2 "memory_operand" "m,m,m,m")) - (clobber (match_operand:XF 3 "memory_operand" "m,m,m,o"))] - "TARGET_80387 && (GET_CODE (operands[0]) != MEM - || GET_CODE (operands[1]) != MEM)" - "* +(define_insn "*extenddftf2_1" + [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m") + (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { - output_float_extend (insn, operands); - return \"\"; -}" - [(set_attr "type" "fld,fpop,fld,fpop")]) - -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float_extend:XF (match_operand:SF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" "")) - (clobber (match_operand:XF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed && NON_STACK_REG_P (operands[1])" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float_extend:XF (match_dup 2)))] - "") + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp\t%y0"; + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float_extend:XF (match_operand:SF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" "")) - (clobber (match_operand:XF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed && NON_STACK_REG_P (operands[0])" - [(set (match_dup 3) - (float_extend:XF (match_dup 1))) - (set (match_dup 0) - (match_dup 3))] - "") + case 1: + /* There is no non-popping store to memory for XFmode. So if + we need one, follow the store with a load. */ + if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0\n\tfld%z0\t%y0"; + else + return "fstp%z0\t%y0"; -(define_split - [(set (match_operand:XF 0 "nonimmediate_operand" "") - (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" "")) - (clobber (match_operand:XF 3 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float_extend:XF (match_dup 1)))] - "") + default: + abort (); + } +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF,XF")]) -(define_insn "" - [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") - (float_extend:XF - (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387 && (GET_CODE (operands[0]) != MEM - || GET_CODE (operands[1]) != MEM)" - "* -{ - output_float_extend (insn, operands); - return \"\"; -}" - [(set_attr "type" "fld,fpop")]) +;; %%% This seems bad bad news. +;; This cannot output into an f-reg because there is no way to be sure +;; of truncating in that case. Otherwise this is just like a simple move +;; insn. So we pretend we can output to a reg in order to get better +;; register preferencing, but we really use a stack slot. (define_expand "truncdfsf2" [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") (float_truncate:SF (match_operand:DF 1 "register_operand" ""))) (clobber (match_dup 2))])] - "TARGET_80387" + "TARGET_80387 || TARGET_SSE2" " + if (TARGET_80387) + operands[2] = assign_386_stack_local (SFmode, 0); + else + { + emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1])); + DONE; + } +") + +(define_insn "*truncdfsf2_1" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] + "TARGET_80387 && !TARGET_SSE2" { - operands[2] = (rtx) assign_386_stack_local (SFmode, 0); -}") + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort (); + } +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "mode" "SF,SF,SF,SF")]) -(define_insn "" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f,m,!*r") +(define_insn "*truncdfsf2_1_sse" + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,Y") (float_truncate:SF - (match_operand:DF 1 "register_operand" "0,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "m,m,m"))] - "TARGET_80387" - "* + (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] + "TARGET_80387 && TARGET_SSE2" { - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - rtx xops[1]; + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 4: + return "cvtsd2ss\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "fmov,multi,multi,multi,sse") + (set_attr "mode" "SF,SF,SF,SF,DF")]) - xops[0] = GET_CODE (operands[0]) == MEM ? operands[0] : operands[2]; +(define_insn "*truncdfsf2_2" + [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] + "TARGET_80387 && TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return "cvtsd2ss\t{%1, %0|%0, %1}"; + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort (); + } +} + [(set_attr "type" "sse,fmov") + (set_attr "mode" "DF,SF")]) - if (stack_top_dies || STACK_REG_P (operands[0])) - output_asm_insn (AS1 (fstp%z0,%0), xops); +(define_insn "truncdfsf2_3" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; else - output_asm_insn (AS1 (fst%z0,%0), xops); - - if (STACK_REG_P (operands[0])) - return AS1 (fld%z2,%2); - else if (NON_STACK_REG_P (operands[0])) - return AS2 (mov%L0,%2,%0); + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) - return \"\"; -}" - [(set_attr "type" "fpop")]) +(define_insn "truncdfsf2_sse_only" + [(set (match_operand:SF 0 "register_operand" "=Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY")))] + "!TARGET_80387 && TARGET_SSE2" + "cvtsd2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) (define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF (match_operand:DF 1 "register_operand" ""))) + [(set (match_operand:SF 0 "memory_operand" "") + (float_truncate:SF + (match_operand:DF 1 "register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (float_truncate:SF (match_dup 1))) - (set (match_dup 0) - (match_dup 2))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] "") (define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF (match_operand:DF 1 "register_operand" ""))) + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" ""))) + (clobber (match_operand 2 "" ""))] + "TARGET_80387 && reload_completed + && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])" + [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] + "") + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float_truncate:SF (match_dup 1)))] + "TARGET_80387 && reload_completed + && FP_REG_P (operands[1])" + [(set (match_dup 2) (float_truncate:SF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] "") -;; This cannot output into an f-reg because there is no way to be sure -;; of truncating in that case. +(define_expand "truncxfsf2" + [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_dup 2))])] + "!TARGET_64BIT && TARGET_80387" + "operands[2] = assign_386_stack_local (SFmode, 0);") -(define_insn "" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] - "TARGET_80387" - "* +(define_insn "*truncxfsf2_1" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] + "!TARGET_64BIT && TARGET_80387" { - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort(); + } +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "mode" "SF")]) - if (stack_top_dies) - return AS1 (fstp%z0,%0); +(define_insn "*truncxfsf2_2" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f")))] + "!TARGET_64BIT && TARGET_80387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; else - return AS1 (fst%z0,%0); -}" - [(set_attr "type" "fpop")]) + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) -(define_expand "truncxfsf2" +(define_split + [(set (match_operand:SF 0 "memory_operand" "") + (float_truncate:SF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:SF 2 "memory_operand" ""))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] + "") + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:SF 2 "memory_operand" ""))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:SF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] + "") + +(define_expand "trunctfsf2" [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) + (match_operand:TF 1 "register_operand" ""))) (clobber (match_dup 2))])] "TARGET_80387" - " -{ - operands[2] = (rtx) assign_386_stack_local (SFmode, 0); -}") + "operands[2] = assign_386_stack_local (SFmode, 0);") -(define_insn "" - [(set (match_operand:SF 0 "nonimmediate_operand" "=f,m,!*r") +(define_insn "*trunctfsf2_1" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") (float_truncate:SF - (match_operand:XF 1 "register_operand" "0,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "m,m,m"))] + (match_operand:TF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] "TARGET_80387" - "* { - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - rtx xops[1]; - - xops[0] = GET_CODE (operands[0]) == MEM ? operands[0] : operands[2]; + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort(); + } +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "mode" "SF")]) - if (stack_top_dies || STACK_REG_P (operands[0])) - output_asm_insn (AS1 (fstp%z0,%0), xops); +(define_insn "*trunctfsf2_2" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:TF 1 "register_operand" "f")))] + "TARGET_80387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; else - output_asm_insn (AS1 (fst%z0,%0), xops); - - if (STACK_REG_P (operands[0])) - return AS1 (fld%z2,%2); - else if (NON_STACK_REG_P (operands[0])) - return AS2 (mov%L0,%2,%0); - - return \"\"; -}" - [(set_attr "type" "fpop")]) + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) (define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF (match_operand:XF 1 "register_operand" ""))) + [(set (match_operand:SF 0 "memory_operand" "") + (float_truncate:SF + (match_operand:TF 1 "register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (float_truncate:SF (match_dup 1))) - (set (match_dup 0) - (match_dup 2))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] "") (define_split - [(set (match_operand:SF 0 "memory_operand" "") - (float_truncate:SF (match_operand:XF 1 "register_operand" ""))) + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:TF 1 "register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float_truncate:SF (match_dup 1)))] + [(set (match_dup 2) (float_truncate:SF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] "") -(define_insn "" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))] - "TARGET_80387" - "* -{ - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - - if (stack_top_dies) - return AS1 (fstp%z0,%0); - else - return AS1 (fst%z0,%0); -}" - [(set_attr "type" "fpop")]) (define_expand "truncxfdf2" [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") (float_truncate:DF (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] - "TARGET_80387" - " -{ - operands[2] = (rtx) assign_386_stack_local (DFmode, 0); -}") + "!TARGET_64BIT && TARGET_80387" + "operands[2] = assign_386_stack_local (DFmode, 0);") -(define_insn "" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,!*r") +(define_insn "*truncxfdf2_1" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf") (float_truncate:DF - (match_operand:XF 1 "register_operand" "0,f,f"))) - (clobber (match_operand:DF 2 "memory_operand" "m,m,o"))] - "TARGET_80387" - "* + (match_operand:XF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] + "!TARGET_64BIT && TARGET_80387" { - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; - rtx xops[2]; - - xops[0] = GET_CODE (operands[0]) == MEM ? operands[0] : operands[2]; - - if (stack_top_dies || STACK_REG_P (operands[0])) - output_asm_insn (AS1 (fstp%z0,%0), xops); - else - output_asm_insn (AS1 (fst%z0,%0), xops); - - if (STACK_REG_P (operands[0])) - return AS1 (fld%z2,%2); - else if (NON_STACK_REG_P (operands[0])) + switch (which_alternative) { - xops[0] = operands[0]; - xops[1] = operands[2]; - output_asm_insn (output_move_double (xops), xops); + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort(); } + abort (); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "mode" "DF")]) - return \"\"; -}" - [(set_attr "type" "fpop")]) +(define_insn "*truncxfdf2_2" + [(set (match_operand:DF 0 "memory_operand" "=m") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f")))] + "!TARGET_64BIT && TARGET_80387" +{ + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF")]) (define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_truncate:DF (match_operand:XF 1 "register_operand" ""))) + [(set (match_operand:DF 0 "memory_operand" "") + (float_truncate:DF + (match_operand:XF 1 "register_operand" ""))) (clobber (match_operand:DF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (float_truncate:DF (match_dup 1))) - (set (match_dup 0) - (match_dup 2))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] "") (define_split - [(set (match_operand:DF 0 "memory_operand" "") - (float_truncate:DF (match_operand:XF 1 "register_operand" ""))) + [(set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF + (match_operand:XF 1 "register_operand" ""))) (clobber (match_operand:DF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float_truncate:DF (match_dup 1)))] + [(set (match_dup 2) (float_truncate:DF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] "") -(define_insn "" - [(set (match_operand:DF 0 "memory_operand" "=m") - (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] +(define_expand "trunctfdf2" + [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") + (float_truncate:DF + (match_operand:TF 1 "register_operand" ""))) + (clobber (match_dup 2))])] "TARGET_80387" - "* -{ - int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; + "operands[2] = assign_386_stack_local (DFmode, 0);") - if (stack_top_dies) - return AS1 (fstp%z0,%0); - else - return AS1 (fst%z0,%0); -}" - [(set_attr "type" "fpop")]) - -;; Conversions between floating point and fix point. - -(define_expand "fix_truncsfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (fix:SF (match_operand:SF 1 "register_operand" "")))) - (clobber (match_dup 2)) - (clobber (match_dup 3)) - (clobber (match_dup 4)) - (clobber (match_scratch:HI 5 ""))])] +(define_insn "*trunctfdf2_1" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf") + (float_truncate:DF + (match_operand:TF 1 "register_operand" "f,f,f,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] "TARGET_80387" - " { - operands[2] = (rtx) assign_386_stack_local (HImode, 0); - operands[3] = (rtx) assign_386_stack_local (HImode, 1); - operands[4] = (rtx) assign_386_stack_local (SImode, 0); -}") - -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,!&r") - (fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f,f")))) - (clobber (match_operand:HI 2 "memory_operand" "m,m")) - (clobber (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SI 4 "memory_operand" "m,m")) - (clobber (match_scratch:HI 5 "=&r,&r"))] - "TARGET_80387" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fpop")]) + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort(); + } + abort (); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "mode" "DF")]) -(define_expand "fix_truncsfdi2" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "")))) - (clobber (match_dup 1)) - (clobber (match_dup 2)) - (clobber (match_dup 3)) - (clobber (match_dup 4)) - (clobber (match_scratch:HI 5 ""))])] + (define_insn "*trunctfdf2_2" + [(set (match_operand:DF 0 "memory_operand" "=m") + (float_truncate:DF + (match_operand:TF 1 "register_operand" "f")))] "TARGET_80387" - " { - operands[1] = copy_to_mode_reg (SFmode, operands[1]); - operands[2] = (rtx) assign_386_stack_local (HImode, 0); - operands[3] = (rtx) assign_386_stack_local (HImode, 1); - operands[4] = (rtx) assign_386_stack_local (DImode, 0); -}") + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "DF")]) -(define_insn "" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,!&r") - (fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f,f")))) - (clobber (match_dup 1)) - (clobber (match_operand:HI 2 "memory_operand" "m,m")) - (clobber (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "m,o")) - (clobber (match_scratch:HI 5 "=&r,&r"))] +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (float_truncate:DF + (match_operand:TF 1 "register_operand" ""))) + (clobber (match_operand:DF 2 "memory_operand" ""))] "TARGET_80387" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fpop")]) + [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] + "") -(define_expand "fix_truncdfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (fix:DF (match_operand:DF 1 "register_operand" "")))) - (clobber (match_dup 2)) - (clobber (match_dup 3)) - (clobber (match_dup 4)) - (clobber (match_scratch:HI 5 ""))])] - "TARGET_80387" - " -{ - operands[2] = (rtx) assign_386_stack_local (HImode, 0); - operands[3] = (rtx) assign_386_stack_local (HImode, 1); - operands[4] = (rtx) assign_386_stack_local (SImode, 0); -}") +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_truncate:DF + (match_operand:TF 1 "register_operand" ""))) + (clobber (match_operand:DF 2 "memory_operand" ""))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:DF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] + "") -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,!&r") - (fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f,f")))) - (clobber (match_operand:HI 2 "memory_operand" "m,m")) - (clobber (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SI 4 "memory_operand" "m,m")) - (clobber (match_scratch:HI 5 "=&r,&r"))] - "TARGET_80387" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fpop")]) + +;; %%% Break up all these bad boys. -(define_expand "fix_truncdfdi2" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "")))) - (clobber (match_dup 1)) - (clobber (match_dup 2)) - (clobber (match_dup 3)) - (clobber (match_dup 4)) - (clobber (match_scratch:HI 5 ""))])] - "TARGET_80387" - " -{ - operands[1] = copy_to_mode_reg (DFmode, operands[1]); - operands[2] = (rtx) assign_386_stack_local (HImode, 0); - operands[3] = (rtx) assign_386_stack_local (HImode, 1); - operands[4] = (rtx) assign_386_stack_local (DImode, 0); -}") +;; Signed conversion to DImode. -(define_insn "" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,!&r") - (fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f,f")))) - (clobber (match_dup 1)) - (clobber (match_operand:HI 2 "memory_operand" "m,m")) - (clobber (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "m,o")) - (clobber (match_scratch:HI 5 "=&r,&r"))] - "TARGET_80387" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fpop")]) +(define_expand "fix_truncxfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:XF 1 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" + "") -(define_expand "fix_truncxfsi2" - [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (fix:XF (match_operand:XF 1 "register_operand" "")))) - (clobber (match_dup 2)) - (clobber (match_dup 3)) - (clobber (match_dup 4)) - (clobber (match_scratch:HI 5 ""))])] +(define_expand "fix_trunctfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:TF 1 "register_operand" "")))] "TARGET_80387" - " -{ - operands[2] = (rtx) assign_386_stack_local (HImode, 0); - operands[3] = (rtx) assign_386_stack_local (HImode, 1); - operands[4] = (rtx) assign_386_stack_local (SImode, 0); -}") + "") -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=m,!&r") - (fix:SI (fix:XF (match_operand:XF 1 "register_operand" "f,f")))) - (clobber (match_operand:HI 2 "memory_operand" "m,m")) - (clobber (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:SI 4 "memory_operand" "m,m")) - (clobber (match_scratch:HI 5 "=&r,&r"))] - "TARGET_80387" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fpop")]) +(define_expand "fix_truncdfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:DF 1 "register_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_64BIT)" +{ + if (TARGET_64BIT && TARGET_SSE2) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); + emit_insn (gen_fix_truncdfdi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) -(define_expand "fix_truncxfdi2" - [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (fix:XF (match_operand:XF 1 "register_operand" "")))) - (clobber (match_dup 1)) - (clobber (match_dup 2)) - (clobber (match_dup 3)) - (clobber (match_dup 4)) - (clobber (match_scratch:HI 5 ""))])] - "TARGET_80387" - " +(define_expand "fix_truncsfdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:SF 1 "register_operand" "")))] + "TARGET_80387 || (TARGET_SSE && TARGET_64BIT)" { - operands[1] = copy_to_mode_reg (XFmode, operands[1]); - operands[2] = (rtx) assign_386_stack_local (HImode, 0); - operands[3] = (rtx) assign_386_stack_local (HImode, 1); - operands[4] = (rtx) assign_386_stack_local (DImode, 0); -}") + if (TARGET_SSE && TARGET_64BIT) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); + emit_insn (gen_fix_truncsfdi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. +(define_insn_and_split "*fix_truncdi_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f")))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !reload_completed && !reload_in_progress + && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (HImode, 1); + operands[3] = assign_386_stack_local (HImode, 2); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_truncdi_memory (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (DImode, 0); + emit_insn (gen_fix_truncdi_nomemory (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp")]) + +(define_insn "fix_truncdi_nomemory" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=m,m")) + (clobber (match_scratch:DF 5 "=&1f,&1f"))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" + "#" + [(set_attr "type" "fistp")]) + +(define_insn "fix_truncdi_memory" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:DF 4 "=&1f"))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" + "* operands[5] = operands[4]; return output_fix_trunc (insn, operands);" + [(set_attr "type" "fistp")]) -(define_insn "" - [(set (match_operand:DI 0 "nonimmediate_operand" "=m,!&r") - (fix:DI (fix:XF (match_operand:XF 1 "register_operand" "f,f")))) - (clobber (match_dup 1)) - (clobber (match_operand:HI 2 "memory_operand" "m,m")) - (clobber (match_operand:HI 3 "memory_operand" "m,m")) - (clobber (match_operand:DI 4 "memory_operand" "m,o")) - (clobber (match_scratch:HI 5 "=&r,&r"))] - "TARGET_80387" - "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fpop")]) - -;; Conversion between fixed point and floating point. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") -;; ??? Possibly represent floatunssidf2 here in gcc2. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") -(define_expand "floatsisf2" - [(parallel [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (SImode, 0);") +;; When SSE available, it is always faster to use it! +(define_insn "fix_truncsfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_64BIT && TARGET_SSE" + "cvttss2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:SI 2 "memory_operand" "m,m"))] - "TARGET_80387" - "#") +(define_insn "fix_truncdfdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] + "TARGET_64BIT && TARGET_SSE2" + "cvttsd2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "memory_operand" ""))) - (clobber (match_operand:SI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:SF (match_dup 1)))] - "") +;; Signed conversion to SImode. -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:SI 1 "register_operand" ""))) - (clobber (match_operand:SI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:SF (match_dup 2)))] +(define_expand "fix_truncxfsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:XF 1 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" "") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f") - (float:SF (match_operand:SI 1 "memory_operand" "m")))] +(define_expand "fix_trunctfsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:TF 1 "register_operand" "")))] "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) + "") -(define_expand "floathisf2" - [(parallel [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:HI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (HImode, 0);") +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:DF 1 "register_operand" "")))] + "TARGET_80387 || TARGET_SSE2" +{ + if (TARGET_SSE2) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_truncdfsi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:HI 2 "memory_operand" "m,m"))] - "TARGET_80387" - "#") +(define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:SF 1 "register_operand" "")))] + "TARGET_80387 || TARGET_SSE" +{ + if (TARGET_SSE) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_truncsfsi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. +(define_insn_and_split "*fix_truncsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") + (fix:SI (match_operand 1 "register_operand" "f,f")))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !reload_completed && !reload_in_progress + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (HImode, 1); + operands[3] = assign_386_stack_local (HImode, 2); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_truncsi_memory (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (SImode, 0); + emit_insn (gen_fix_truncsi_nomemory (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp")]) + +(define_insn "fix_truncsi_nomemory" + [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") + (fix:SI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:SI 4 "memory_operand" "=m,m"))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp")]) + +(define_insn "fix_truncsi_memory" + [(set (match_operand:SI 0 "memory_operand" "=m") + (fix:SI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands);" + [(set_attr "type" "fistp")]) -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:HI 1 "memory_operand" ""))) - (clobber (match_operand:HI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:SF (match_dup 1)))] +;; When SSE available, it is always faster to use it! +(define_insn "fix_truncsfsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "fix_truncdfsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] + "TARGET_SSE2" + "cvttsd2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (fix:SI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:SI 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:SI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] "") -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:HI 1 "register_operand" ""))) - (clobber (match_operand:HI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:SF (match_dup 2)))] +(define_split + [(set (match_operand:SI 0 "memory_operand" "") + (fix:SI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:SI 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:SI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))])] "") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f") - (float:SF (match_operand:HI 1 "memory_operand" "m")))] - "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) +;; Signed conversion to HImode. -(define_expand "floatdisf2" - [(parallel [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (DImode, 0);") +(define_expand "fix_truncxfhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:XF 1 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" + "") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:DI 2 "memory_operand" "m,o"))] +(define_expand "fix_trunctfhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:TF 1 "register_operand" "")))] "TARGET_80387" - "#") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "memory_operand" ""))) - (clobber (match_operand:DI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:SF (match_dup 1)))] "") -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float:SF (match_operand:DI 1 "register_operand" ""))) - (clobber (match_operand:DI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:SF (match_dup 2)))] +(define_expand "fix_truncdfhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:DF 1 "register_operand" "")))] + "TARGET_80387 && !TARGET_SSE2" "") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f") - (float:SF (match_operand:DI 1 "memory_operand" "m")))] - "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) - -(define_expand "floatsidf2" - [(parallel [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (SImode, 0);") +(define_expand "fix_truncsfhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:SF 1 "register_operand" "")))] + "TARGET_80387 && !TARGET_SSE" + "") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:SI 2 "memory_operand" "m,m"))] - "TARGET_80387" - "#") +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. +(define_insn_and_split "*fix_trunchi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") + (fix:HI (match_operand 1 "register_operand" "f,f")))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !reload_completed && !reload_in_progress + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + "" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (HImode, 1); + operands[3] = assign_386_stack_local (HImode, 2); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunchi_memory (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (HImode, 0); + emit_insn (gen_fix_trunchi_nomemory (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp")]) + +(define_insn "fix_trunchi_nomemory" + [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") + (fix:HI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:HI 4 "memory_operand" "=m,m"))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp")]) + +(define_insn "fix_trunchi_memory" + [(set (match_operand:HI 0 "memory_operand" "=m") + (fix:HI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands);" + [(set_attr "type" "fistp")]) -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "memory_operand" ""))) - (clobber (match_operand:SI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:DF (match_dup 1)))] +(define_split + [(set (match_operand:HI 0 "memory_operand" "") + (fix:HI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:HI 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:HI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))])] "") -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:SI 1 "register_operand" ""))) - (clobber (match_operand:SI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:DF (match_dup 2)))] +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (fix:HI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:HI 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:HI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 4))]) + (set (match_dup 0) (match_dup 4))] "") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (float:DF (match_operand:SI 1 "memory_operand" "m")))] +;; %% Not used yet. +(define_insn "x86_fnstcw_1" + [(set (match_operand:HI 0 "memory_operand" "=m") + (unspec:HI [(reg:HI 18)] 11))] "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) - -(define_expand "floathidf2" - [(parallel [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:HI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] + "fnstcw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "HI") + (set_attr "i387" "1") + (set_attr "ppro_uops" "few")]) + +(define_insn "x86_fldcw_1" + [(set (reg:HI 18) + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] 12))] "TARGET_80387" - "operands[2] = assign_386_stack_local (HImode, 0);") + "fldcw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "HI") + (set_attr "i387" "1") + (set_attr "athlon_decode" "vector") + (set_attr "ppro_uops" "few")]) + +;; Conversion between fixed point and floating point. -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:HI 2 "memory_operand" "m,m"))] - "TARGET_80387" - "#") +;; Even though we only accept memory inputs, the backend _really_ +;; wants to be able to do this between registers. -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:HI 1 "memory_operand" ""))) - (clobber (match_operand:HI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:DF (match_dup 1)))] - "") +(define_insn "floathisf2" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] + "TARGET_80387 && !TARGET_SSE" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "SF") + (set_attr "fp_int_src" "true")]) -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:HI 1 "register_operand" ""))) - (clobber (match_operand:HI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:DF (match_dup 2)))] +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_SSE || TARGET_80387" "") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (float:DF (match_operand:HI 1 "memory_operand" "m")))] - "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) +(define_insn "*floatsisf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f,?f,x") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] + "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + # + cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sse") + (set_attr "mode" "SF") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsisf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "mr")))] + "TARGET_SSE" + "cvtsi2ss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF") + (set_attr "fp_int_src" "true")]) -(define_expand "floatdidf2" - [(parallel [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (DImode, 0);") +(define_expand "floatdisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "(TARGET_64BIT && TARGET_SSE) || TARGET_80387" + "") -(define_insn "" +(define_insn "*floatdisf2_i387_only" + [(set (match_operand:SF 0 "register_operand" "=f,?f") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] + "TARGET_80387 && (!TARGET_SSE || !TARGET_64BIT || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "SF") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdisf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f,?f,x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] + "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + # + cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sse") + (set_attr "mode" "SF") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdisf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "mr")))] + "TARGET_64BIT && TARGET_SSE" + "cvtsi2ss{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF") + (set_attr "fp_int_src" "true")]) + +(define_insn "floathidf2" [(set (match_operand:DF 0 "register_operand" "=f,f") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:DI 2 "memory_operand" "m,o"))] - "TARGET_80387" - "#") + (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] + "TARGET_80387 && !TARGET_SSE2" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) -(define_split +(define_expand "floatsidf2" [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "memory_operand" ""))) - (clobber (match_operand:DI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:DF (match_dup 1)))] + (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))] + "" "") -(define_split +(define_insn "*floatsidf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] + "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + # + cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sse") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsidf2_sse" + [(set (match_operand:DF 0 "register_operand" "=Y") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "mr")))] + "TARGET_SSE2" + "cvtsi2sd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_expand "floatdidf2" [(set (match_operand:DF 0 "register_operand" "") - (float:DF (match_operand:DI 1 "register_operand" ""))) - (clobber (match_operand:DI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:DF (match_dup 2)))] + (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] + "(TARGET_64BIT && TARGET_SSE2) || TARGET_80387" "") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (float:DF (match_operand:DI 1 "memory_operand" "m")))] +(define_insn "*floatdidf2_i387_only" + [(set (match_operand:DF 0 "register_operand" "=f,?f") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_64BIT)" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdidf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] + "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" + "@ + fild%z1\t%1 + # + cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "fmov,multi,sse") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatdidf2_sse" + [(set (match_operand:DF 0 "register_operand" "=Y") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "mr")))] + "TARGET_SSE2" + "cvtsi2sd{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_insn "floathixf2" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] + "!TARGET_64BIT && TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) + +(define_insn "floathitf2" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (float:TF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) -(define_expand "floatsixf2" - [(parallel [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:SI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] +(define_insn "floatsixf2" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] + "!TARGET_64BIT && TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) + +(define_insn "floatsitf2" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (float:TF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] "TARGET_80387" - "operands[2] = assign_386_stack_local (SImode, 0);") + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) -(define_insn "" +(define_insn "floatdixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:SI 2 "memory_operand" "m,m"))] + (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] + "!TARGET_64BIT && TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) + +(define_insn "floatditf2" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (float:TF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] "TARGET_80387" - "#") - -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:SI 1 "memory_operand" ""))) - (clobber (match_operand:SI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:XF (match_dup 1)))] - "") + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) +;; %%% Kill these when reload knows how to do it. (define_split - [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:SI 1 "register_operand" ""))) - (clobber (match_operand:SI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:XF (match_dup 2)))] - "") - -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f") - (float:XF (match_operand:SI 1 "memory_operand" "m")))] - "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) + [(set (match_operand 0 "register_operand" "") + (float (match_operand 1 "register_operand" "")))] + "reload_completed && FLOAT_MODE_P (GET_MODE (operands[0])) + && FP_REG_P (operands[0])" + [(const_int 0)] +{ + operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2])); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +;; Add instructions -(define_expand "floathixf2" - [(parallel [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:HI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (HImode, 0);") +;; %%% splits for addsidi3 +; [(set (match_operand:DI 0 "nonimmediate_operand" "") +; (plus:DI (match_operand:DI 1 "general_operand" "") +; (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))] -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:HI 2 "memory_operand" "m,m"))] - "TARGET_80387" +(define_expand "adddi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC 17))] + "" + "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;") + +(define_insn "*adddi3_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" "#") (define_split - [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:HI 1 "memory_operand" ""))) - (clobber (match_operand:HI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:XF (match_dup 1)))] - "") + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC 17) (unspec:CC [(match_dup 1) (match_dup 2)] 12)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC 17))])] + "split_di (operands+0, 1, operands+0, operands+3); + split_di (operands+1, 1, operands+1, operands+4); + split_di (operands+2, 1, operands+2, operands+5);") + +(define_insn "*adddi3_carry_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (plus:DI (plus:DI (ltu:DI (reg:CC 17) (const_int 0)) + (match_operand:DI 1 "nonimmediate_operand" "%0,0")) + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "adc{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "DI") + (set_attr "ppro_uops" "few")]) + +(define_insn "*adddi3_cc_rex64" + [(set (reg:CC 17) (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")] 12)) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "add{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*addsi3_carry" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_operand:SI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "few")]) + +(define_insn "*addsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_operand:SI 1 "nonimmediate_operand" "%0")) + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "few")]) + +(define_insn "*addsi3_cc" + [(set (reg:CC 17) (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm")] 12)) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" + "add{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "addqi3_cc" + [(set (reg:CC 17) (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qi,qm")] 12)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "add{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:HI 1 "register_operand" ""))) - (clobber (match_operand:HI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:XF (match_dup 2)))] - "") +(define_expand "addsi3" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] + "" + "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;") -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f") - (float:XF (match_operand:HI 1 "memory_operand" "m")))] - "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) +(define_insn "*lea_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "address_operand" "p"))] + "!TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) -(define_expand "floatdixf2" - [(parallel [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:DI 1 "nonimmediate_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (DImode, 0);") +(define_insn "*lea_1_rex64" + [(set (match_operand:SI 0 "register_operand" "=r") + (subreg:SI (match_operand:DI 1 "address_operand" "p") 0))] + "TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (subreg:SI (match_operand:DI 1 "address_operand" "p") 0)))] + "TARGET_64BIT" + "lea{l}\t{%a1, %k0|%k0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_2_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "address_operand" "p"))] + "TARGET_64BIT" + "lea{q}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "DI")]) + +;; The lea patterns for non-Pmodes needs to be matched by several +;; insns converted to real lea by splitters. + +(define_insn_and_split "*lea_general_1" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (match_operand 1 "register_operand" "r") + (match_operand 2 "register_operand" "r")) + (match_operand 3 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[2]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 3 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2" + [(set (match_operand 0 "register_operand" "=r") + (plus (mult (match_operand 1 "register_operand" "r") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "nonmemory_operand" "ri")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "nonmemory_operand" "ri"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (mult (match_operand 1 "register_operand" "r") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "register_operand" "r")) + (match_operand 4 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_size) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[3])" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); + pat = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], + operands[2]), + operands[3]), + operands[4]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "register_operand" "r")) + (match_operand:SI 4 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (match_dup 4)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*adddi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:DI 2 "x86_64_general_operand" "rme,re,re"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{q}\t{%a2, %0|%0, %a2}"; -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,!r"))) - (clobber (match_operand:DI 2 "memory_operand" "m,o"))] - "TARGET_80387" - "#") + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{q}\t%0"; + else + abort (); -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:DI 1 "memory_operand" ""))) - (clobber (match_operand:DI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (float:XF (match_dup 1)))] - "") + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:DI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. (define_split - [(set (match_operand:XF 0 "register_operand" "") - (float:XF (match_operand:DI 1 "register_operand" ""))) - (clobber (match_operand:DI 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) - (match_dup 1)) - (set (match_dup 0) - (float:XF (match_dup 2)))] + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (plus:DI (match_dup 1) + (match_dup 2)))] "") -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f") - (float:XF (match_operand:DI 1 "memory_operand" "m")))] - "TARGET_80387" - "* return AS1 (fild%z1,%1);" - [(set_attr "type" "fpop")]) - -;;- add instructions - -(define_insn "*addsidi3_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,!&r,!r,o,!o") - (plus:DI (match_operand:DI 1 "general_operand" "0,0,0,o,riF,riF,o") - (zero_extend:DI (match_operand:SI 2 "general_operand" "o,ri,ri,roi,roi,ri,ri")))) - (clobber (match_scratch:SI 3 "=X,X,X,X,X,X,&r"))] - "" - "* +(define_insn "*adddi_2_rex64" + [(set (reg 17) + (compare + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rme,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, DImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" { - rtx low[3], high[3], xops[7]; - - CC_STATUS_INIT; + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{q}\t%0"; + else + abort (); - split_di (operands, 2, low, high); - high[2] = const0_rtx; - low[2] = operands[2]; + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* ???? We ought to handle there the 32bit case too + - do we need new constrant? */ + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +(define_insn "*adddi_3_rex64" + [(set (reg 17) + (compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme")) + (match_operand:DI 1 "x86_64_general_operand" "%0"))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{q}\t%0"; + else + abort (); - if (!rtx_equal_p (operands[0], operands[1])) + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* ???? We ought to handle there the 32bit case too + - do we need new constrant? */ + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; This pattern also don't hold of 0x8000000000000000, since the value overflows +; when negated. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. +(define_insn "*adddi_4_rex64" + [(set (reg 17) + (compare (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:DI 2 "x86_64_immediate_operand" "e"))) + (clobber (match_scratch:DI 0 "=rm"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[1]; - xops[3] = low[1]; + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{q}\t%0"; + else if (operands[2] == const1_rtx) + return "dec{q}\t%0"; + else + abort(); - if (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - { - output_asm_insn (AS2 (mov%L1,%3,%1), xops); - output_asm_insn (AS2 (mov%L0,%2,%0), xops); - } + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128)) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))) + return "sub{q}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +(define_insn "*adddi_5_rex64" + [(set (reg 17) + (compare + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rme")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{q}\t%0"; else - { - xops[4] = high[2]; - xops[5] = low[2]; - xops[6] = operands[3]; - output_asm_insn (AS2 (mov%L6,%3,%6), xops); - output_asm_insn (AS2 (add%L6,%5,%6), xops); - output_asm_insn (AS2 (mov%L1,%6,%1), xops); - output_asm_insn (AS2 (mov%L6,%2,%6), xops); - output_asm_insn (AS2 (adc%L6,%4,%6), xops); - output_asm_insn (AS2 (mov%L0,%6,%0), xops); - RET; - } + abort(); + + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) - output_asm_insn (AS2 (add%L0,%2,%0), low); - output_asm_insn (AS2 (adc%L0,%2,%0), high); - cc_status.value1 = high[0]; - cc_status.flags = CC_NO_OVERFLOW; - RET; -}" - [(set_attr "type" "binary")]) -(define_insn "addsidi3_2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,&r,!&r,&r,o,o,!o") - (plus:DI (zero_extend:DI (match_operand:SI 2 "general_operand" "o,ri,ri,o,o,ri,ri,i,r")) - (match_operand:DI 1 "general_operand" "0,0,0,iF,ro,roiF,riF,o,o"))) - (clobber (match_scratch:SI 3 "=X,X,X,X,X,X,X,&r,&r"))] - "" - "* +(define_insn "*addsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:SI 2 "general_operand" "rmni,rni,rni"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" { - rtx low[3], high[3], xops[7]; + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %0|%0, %a2}"; - CC_STATUS_INIT; + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%0"; + else + abort(); - split_di (operands, 2, low, high); - high[2] = const0_rtx; - low[2] = operands[2]; + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); - if (!rtx_equal_p (operands[0], operands[1])) + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:SI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (plus (match_operand 1 "register_operand" "") + (match_operand 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat; + /* In -fPIC mode the constructs like (const (unspec [symbol_ref])) + may confuse gen_lowpart. */ + if (GET_MODE (operands[0]) != Pmode) + { + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + } + operands[0] = gen_lowpart (SImode, operands[0]); + pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; It may seem that nonimmediate operand is proper one for operand 1. +;; The addsi_1 pattern allows nonimmediate operand at that place and +;; we take care in ix86_binary_operator_ok to not allow two memory +;; operands so proper swapping will be done in reload. This allow +;; patterns constructed from addsi_1 to match. +(define_insn "addsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") + (match_operand:SI 2 "general_operand" "rmni,rni")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[1]; - xops[3] = low[1]; + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %k0|%k0, %a2}"; + + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%k0"; + else + abort(); - if (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - { - if (rtx_equal_p (low[0], operands[2])) - { - output_asm_insn (AS2 (mov%L0,%2,%0), high); - output_asm_insn (AS2 (add%L0,%1,%0), low); - output_asm_insn (AS2 (adc%L0,%1,%0), high); - RET; - } - if (rtx_equal_p (high[0], operands[2])) - { - if (GET_CODE (operands[0]) != MEM) - { - output_asm_insn (AS2 (mov%L0,%2,%0), low); - output_asm_insn (AS2 (mov%L0,%2,%0), high); - output_asm_insn (AS2 (add%L0,%1,%0), low); - output_asm_insn (AS2 (adc%L0,%1,%0), high); - } - else - { - /* It's too late to ask for a scratch now - but this - will probably not happen too often. */ - output_asm_insn (AS2 (add%L1,%2,%1), low); - output_asm_insn (AS2 (mov%L0,%1,%0), low); - output_asm_insn (AS2 (mov%L1,%2,%1), low); - output_asm_insn (AS2 (mov%L0,%2,%0), high); - output_asm_insn (AS2 (adc%L0,%1,%0), high); - output_asm_insn (AS2 (sub%L1,%0,%1), low); - output_asm_insn (AS1 (neg%L1,%1), low); - } - RET; - } - output_asm_insn (AS2 (mov%L1,%3,%1), xops); - output_asm_insn (AS2 (mov%L0,%2,%0), xops); - } + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:SI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); +}) + +(define_insn "*addsi_2" + [(set (reg 17) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rmni,rni")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (plus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%0"; else - { - xops[4] = high[2]; - xops[5] = low[2]; - xops[6] = operands[3]; - output_asm_insn (AS2 (mov%L6,%3,%6), xops); - output_asm_insn (AS2 (add%L6,%5,%6), xops); - output_asm_insn (AS2 (mov%L1,%6,%1), xops); - output_asm_insn (AS2 (mov%L6,%2,%6), xops); - output_asm_insn (AS2 (adc%L6,%4,%6), xops); - output_asm_insn (AS2 (mov%L0,%6,%0), xops); - RET; - } + abort(); + + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_2_zext" + [(set (reg 17) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rmni")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%k0"; + else + abort(); - output_asm_insn (AS2 (add%L0,%2,%0), low); - output_asm_insn (AS2 (adc%L0,%2,%0), high); - cc_status.value1 = high[0]; - cc_status.flags = CC_NO_OVERFLOW; - RET; -}" - [(set_attr "type" "binary")]) + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_3" + [(set (reg 17) + (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%0"; + else + abort(); -(define_insn "adddi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o") - (plus:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o") - (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o"))) - (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))] - "" - "* + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_3_zext" + [(set (reg 17) + (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" { - rtx low[3], high[3], xops[7], temp; + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%k0"; + else + abort(); - CC_STATUS_INIT; + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +; For comparisons agains 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; This pattern also don't hold of 0x80000000, since the value overflows +; when negated. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. +(define_insn "*addsi_4" + [(set (reg 17) + (compare (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (match_scratch:SI 0 "=rm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{l}\t%0"; + else if (operands[2] == const1_rtx) + return "dec{l}\t%0"; + else + abort(); - if (rtx_equal_p (operands[0], operands[2])) + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128))) + return "sub{l}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_5" + [(set (reg 17) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rmni")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) { - temp = operands[1]; - operands[1] = operands[2]; - operands[2] = temp; + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else if (operands[2] == constm1_rtx) + return "dec{l}\t%0"; + else + abort(); + + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) - split_di (operands, 3, low, high); - if (!rtx_equal_p (operands[0], operands[1])) +(define_expand "addhi3" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;") + +;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah +;; type optimizations enabled by define-splits. This is not important +;; for PII, and in fact harmful because of partial register stalls. + +(define_insn "*addhi_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:HI 2 "general_operand" "ri,rm,rni"))) + (clobber (reg:CC 17))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[1]; - xops[3] = low[1]; + case TYPE_LEA: + return "#"; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return "dec{w}\t%0"; + abort(); - if (GET_CODE (operands[0]) != MEM) + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) { - output_asm_insn (AS2 (mov%L1,%3,%1), xops); - output_asm_insn (AS2 (mov%L0,%2,%0), xops); + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; } - else + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (eq_attr "alternative" "2") + (const_string "lea") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu")))) + (set_attr "mode" "HI,HI,SI")]) + +(define_insn "*addhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC 17))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return "dec{w}\t%0"; + abort(); + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) { - xops[4] = high[2]; - xops[5] = low[2]; - xops[6] = operands[3]; - output_asm_insn (AS2 (mov%L6,%3,%6), xops); - output_asm_insn (AS2 (add%L6,%5,%6), xops); - output_asm_insn (AS2 (mov%L1,%6,%1), xops); - output_asm_insn (AS2 (mov%L6,%2,%6), xops); - output_asm_insn (AS2 (adc%L6,%4,%6), xops); - output_asm_insn (AS2 (mov%L0,%6,%0), xops); - RET; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; } + return "add{w}\t{%2, %0|%0, %2}"; } - - cc_status.value1 = high[0]; - cc_status.flags = CC_NO_OVERFLOW; - - if (GET_CODE (operands[3]) == REG && GET_CODE (operands[2]) != REG) +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_2" + [(set (reg 17) + (compare + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmni,rni")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (plus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[2]; - xops[3] = low[2]; - xops[4] = operands[3]; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return "dec{w}\t%0"; + abort(); - output_asm_insn (AS2 (mov%L4,%3,%4), xops); - output_asm_insn (AS2 (add%L1,%4,%1), xops); - output_asm_insn (AS2 (mov%L4,%2,%4), xops); - output_asm_insn (AS2 (adc%L0,%4,%0), xops); + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; } - - else if (GET_CODE (low[2]) != CONST_INT || INTVAL (low[2]) != 0) +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_3" + [(set (reg 17) + (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni")) + (match_operand:HI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" +{ + switch (get_attr_type (insn)) { - output_asm_insn (AS2 (add%L0,%2,%0), low); - output_asm_insn (AS2 (adc%L0,%2,%0), high); - } - - else - output_asm_insn (AS2 (add%L0,%2,%0), high); + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return "dec{w}\t%0"; + abort(); - RET; -}" - [(set_attr "type" "binary")]) - -;; On a 486, it is faster to do movl/addl than to do a single leal if -;; operands[1] and operands[2] are both registers. - -(define_expand "addsi3" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" "")))] - "" - "IX86_EXPAND_BINARY_OPERATOR (PLUS, SImode, operands);") + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +; See comments above addsi_3_imm for details. +(define_insn "*addhi_4" + [(set (reg 17) + (compare (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (match_scratch:HI 0 "=rm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xffff) != 0x8000" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return "inc{w}\t%0"; + else if (operands[2] == const1_rtx) + return "dec{w}\t%0"; + else + abort(); -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:SI 2 "general_operand" "rmi,ri,ri")))] - "ix86_binary_operator_ok (PLUS, SImode, operands)" - "* + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128))) + return "sub{w}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + + +(define_insn "*addhi_5" + [(set (reg 17) + (compare + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rmni")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" { - if (REG_P (operands[0]) && REG_P (operands[1]) - && (REG_P (operands[2]) || CONSTANT_P (operands[2])) - && REGNO (operands[0]) != REGNO (operands[1])) + switch (get_attr_type (insn)) { - if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) - return AS2 (add%L0,%1,%0); + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return "dec{w}\t%0"; + abort(); - if (operands[2] == stack_pointer_rtx) + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) { - rtx temp; - - temp = operands[1]; - operands[1] = operands[2]; - operands[2] = temp; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) - if (operands[2] != stack_pointer_rtx) +(define_expand "addqi3" + [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*addqi_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r") + (match_operand:QI 2 "general_operand" "qn,qmn,rn,rn"))) + (clobber (reg:CC 17))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + abort(); + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) { - CC_STATUS_INIT; - operands[1] = SET_SRC (PATTERN (insn)); - return AS2 (lea%L0,%a1,%0); + operands[2] = GEN_INT (-INTVAL (operands[2])); + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; } - - if (!rtx_equal_p (operands[0], operands[1])) - output_asm_insn (AS2 (mov%L0,%1,%0), operands); - - if (operands[2] == const1_rtx) - return AS1 (inc%L0,%0); - - if (operands[2] == constm1_rtx) - return AS1 (dec%L0,%0); - - /* subl $-128,%ebx is smaller than addl $128,%ebx. */ - if (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 128) +} + [(set (attr "type") + (if_then_else (eq_attr "alternative" "3") + (const_string "lea") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu")))) + (set_attr "mode" "QI,QI,SI,SI")]) + +(define_insn "*addqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC 17))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) { - /* This doesn't compute the carry bit in the same way - * as add%L0, but we use inc and dec above and they - * don't set the carry bit at all. If inc/dec don't need - * a CC_STATUS_INIT, this doesn't either... */ - operands[2] = GEN_INT (-128); - return AS2 (sub%L0,%2,%0); - } - - return AS2 (add%L0,%2,%0); -}" - [(set_attr "type" "binary")]) + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + abort(); -;; addsi3 is faster, so put this after. - -(define_insn "movsi_lea" - [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:QI 1 "address_operand" "p"))] - "" - "* + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*addqi_2" + [(set (reg 17) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmni,qni")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, QImode, operands)" { - /* Adding a constant to a register is faster with an add. */ - /* ??? can this ever happen? */ - if (GET_CODE (operands[1]) == PLUS - && GET_CODE (XEXP (operands[1], 1)) == CONST_INT - && rtx_equal_p (operands[0], XEXP (operands[1], 0))) + switch (get_attr_type (insn)) { - operands[1] = XEXP (operands[1], 1); - - if (operands[1] == const1_rtx) - return AS1 (inc%L0,%0); + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "dec{b}\t%0"; + abort(); - if (operands[1] == constm1_rtx) - return AS1 (dec%L0,%0); - - return AS2 (add%L0,%1,%0); + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; } - - CC_STATUS_INIT; - return AS2 (lea%L0,%a1,%0); -}" - [(set_attr "type" "lea")]) - -;; ??? `lea' here, for three operand add? If leaw is used, only %bx, -;; %si and %di can appear in SET_SRC, and output_asm_insn might not be -;; able to handle the operand. But leal always works? - -(define_expand "addhi3" - [(set (match_operand:HI 0 "general_operand" "") - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" "")))] - "" - "IX86_EXPAND_BINARY_OPERATOR (PLUS, HImode, operands);") - -(define_insn "" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,?r") - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:HI 2 "general_operand" "ri,rm,ri")))] - "ix86_binary_operator_ok (PLUS, HImode, operands)" - "* +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_3" + [(set (reg 17) + (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni")) + (match_operand:QI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCZmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" { - if (REG_P (operands[0]) && REG_P (operands[1]) - && (REG_P (operands[2]) || CONSTANT_P (operands[2])) - && REGNO (operands[0]) != REGNO (operands[1])) + switch (get_attr_type (insn)) { - if (operands[2] == stack_pointer_rtx) - abort (); + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "dec{b}\t%0"; + abort(); - CC_STATUS_INIT; - operands[1] - = gen_rtx_PLUS (SImode, - gen_rtx_REG (SImode, REGNO (operands[1])), - (! REG_P (operands[2]) - ? operands[2] - : gen_rtx_REG (SImode, REGNO (operands[2])))); - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); - return AS2 (lea%L0,%a1,%0); + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; } - - /* ??? what about offsettable memory references? */ - if (!TARGET_PENTIUMPRO /* partial stalls are just too painful to risk. */ - && QI_REG_P (operands[0]) - && GET_CODE (operands[2]) == CONST_INT - && (INTVAL (operands[2]) & 0xff) == 0 - && i386_cc_probably_useless_p (insn)) +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +; See comments above addsi_3_imm for details. +(define_insn "*addqi_4" + [(set (reg 17) + (compare (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n"))) + (clobber (match_scratch:QI 0 "=qm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xff) != 0x80" +{ + switch (get_attr_type (insn)) { - int byteval = (INTVAL (operands[2]) >> 8) & 0xff; - CC_STATUS_INIT; - - if (byteval == 1) - return AS1 (inc%B0,%h0); - else if (byteval == 255) - return AS1 (dec%B0,%h0); + case TYPE_INCDEC: + if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "inc{b}\t%0"; + else if (operands[2] == const1_rtx) + return "dec{b}\t%0"; + else + abort(); - operands[2] = GEN_INT (byteval); - return AS2 (add%B0,%2,%h0); + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{b}\t{%2, %0|%0, %2}"; + } + return "sub{b}\t{%2, %0|%0, %2}"; } - - /* Use a 32-bit operation when possible, to avoid the prefix penalty. */ - if (REG_P (operands[0]) - && i386_aligned_p (operands[2]) - && i386_cc_probably_useless_p (insn)) +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + + +(define_insn "*addqi_5" + [(set (reg 17) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qmni")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" +{ + switch (get_attr_type (insn)) { - CC_STATUS_INIT; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "dec{b}\t%0"; + abort(); - if (GET_CODE (operands[2]) == CONST_INT) + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) { - HOST_WIDE_INT intval = 0xffff & INTVAL (operands[2]); - - if (intval == 1) - return AS1 (inc%L0,%k0); - - if (intval == 0xffff) - return AS1 (dec%L0,%k0); - - operands[2] = i386_sext16_if_const (operands[2]); + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; } - return AS2 (add%L0,%k2,%k0); + return "add{b}\t{%2, %0|%0, %2}"; } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + + +(define_insn "addqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "Qmn"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "dec{b}\t%h0"; + abort(); - if (operands[2] == const1_rtx) - return AS1 (inc%W0,%0); - - if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 65535)) - return AS1 (dec%W0,%0); - - return AS2 (add%W0,%2,%0); -}" - [(set_attr "type" "binary")]) - -(define_expand "addqi3" - [(set (match_operand:QI 0 "general_operand" "") - (plus:QI (match_operand:QI 1 "general_operand" "") - (match_operand:QI 2 "general_operand" "")))] - "" - "IX86_EXPAND_BINARY_OPERATOR (PLUS, QImode, operands);") - -(define_insn "" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,?q") - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q") - (match_operand:QI 2 "general_operand" "qn,qmn,qn")))] - "ix86_binary_operator_ok (PLUS, QImode, operands)" - "* + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn"))) + (clobber (reg:CC 17))] + "TARGET_64BIT" { - if (REG_P (operands[0]) && REG_P (operands[1]) - && (REG_P (operands[2]) || CONSTANT_P (operands[2])) - && (REGNO (operands[0]) != REGNO (operands[1]) - || NON_QI_REG_P (operands[1]) - || (REG_P (operands[2]) && NON_QI_REG_P (operands[2])))) + switch (get_attr_type (insn)) { - if (operands[2] == stack_pointer_rtx) - abort (); + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return "dec{b}\t%h0"; + abort(); - CC_STATUS_INIT; - operands[1] - = gen_rtx_PLUS (SImode, - gen_rtx_REG (SImode, REGNO (operands[1])), - (! REG_P (operands[2]) - ? operands[2] - : gen_rtx_REG (SImode, REGNO (operands[2])))); - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); - return AS2 (lea%L0,%a1,%0); - } - if (operands[2] == const1_rtx) - return AS1 (inc%B0,%0); - - if (operands[2] == constm1_rtx - || (GET_CODE (operands[2]) == CONST_INT - && INTVAL (operands[2]) == 255)) - return AS1 (dec%B0,%0); - - return AS2 (add%B0,%2,%0); -}" - [(set_attr "type" "binary")]) - -;Lennart Augustsson <augustss@cs.chalmers.se> -;says this pattern just makes slower code: -; pushl %ebp -; addl $-80,(%esp) -;instead of -; leal -80(%ebp),%eax -; pushl %eax -; -;(define_insn "" -; [(set (match_operand:SI 0 "push_operand" "=<") -; (plus:SI (match_operand:SI 1 "register_operand" "%r") -; (match_operand:SI 2 "nonmemory_operand" "ri")))] -; "" -; "* -;{ -; rtx xops[4]; -; xops[0] = operands[0]; -; xops[1] = operands[1]; -; xops[2] = operands[2]; -; xops[3] = gen_rtx_MEM (SImode, stack_pointer_rtx); -; output_asm_insn (\"push%z1 %1\", xops); -; output_asm_insn (AS2 (add%z3,%2,%3), xops); -; RET; -;}") + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC 17))] + "" + "add{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) ;; The patterns that match these are at the end of this file. @@ -3708,204 +6880,316 @@ [(set (match_operand:XF 0 "register_operand" "") (plus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" + "") + +(define_expand "addtf3" + [(set (match_operand:TF 0 "register_operand" "") + (plus:TF (match_operand:TF 1 "register_operand" "") + (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") (define_expand "adddf3" [(set (match_operand:DF 0 "register_operand" "") - (plus:DF (match_operand:DF 1 "nonimmediate_operand" "") + (plus:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "addsf3" [(set (match_operand:SF 0 "register_operand" "") - (plus:SF (match_operand:SF 1 "nonimmediate_operand" "") + (plus:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || TARGET_SSE_MATH" "") -;;- subtract instructions - -(define_insn "subsidi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,&r,!&r,o,o,!o") - (minus:DI (match_operand:DI 1 "general_operand" "0iF,0,roiF,roiF,riF,o,o") - (zero_extend:DI (match_operand:SI 2 "general_operand" "o,ri,ri,o,ri,i,r")))) - (clobber (match_scratch:SI 3 "=X,X,X,X,X,&r,&r"))] - "" - "* -{ - rtx low[3], high[3], xops[7]; - - CC_STATUS_INIT; - - split_di (operands, 2, low, high); - high[2] = const0_rtx; - low[2] = operands[2]; - - if (!rtx_equal_p (operands[0], operands[1])) - { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[1]; - xops[3] = low[1]; +;; Subtract instructions - if (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - { - output_asm_insn (AS2 (mov%L1,%3,%1), xops); - output_asm_insn (AS2 (mov%L0,%2,%0), xops); - } - else - { - xops[4] = high[2]; - xops[5] = low[2]; - xops[6] = operands[3]; - output_asm_insn (AS2 (mov%L6,%3,%6), xops); - output_asm_insn (AS2 (sub%L6,%5,%6), xops); - output_asm_insn (AS2 (mov%L1,%6,%1), xops); - output_asm_insn (AS2 (mov%L6,%2,%6), xops); - output_asm_insn (AS2 (sbb%L6,%4,%6), xops); - output_asm_insn (AS2 (mov%L0,%6,%0), xops); - RET; - } - } +;; %%% splits for subsidi3 - output_asm_insn (AS2 (sub%L0,%2,%0), low); - output_asm_insn (AS2 (sbb%L0,%2,%0), high); - cc_status.value1 = high[0]; - cc_status.flags = CC_NO_OVERFLOW; - - RET; -}" - [(set_attr "type" "binary")]) - -(define_insn "subdi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,o,o,!&r,!o") - (minus:DI (match_operand:DI 1 "general_operand" "0,0,0iF,or,roiF,roiF") - (match_operand:DI 2 "general_operand" "or,riF,or,iF,roiF,roiF"))) - (clobber (match_scratch:SI 3 "=X,X,&r,&r,X,&r"))] +(define_expand "subdi3" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC 17))])] "" - "* -{ - rtx low[3], high[3], xops[7]; - - CC_STATUS_INIT; - - split_di (operands, 3, low, high); - - if (!rtx_equal_p (operands[0], operands[1])) - { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[1]; - xops[3] = low[1]; - - if (GET_CODE (operands[0]) != MEM) - { - output_asm_insn (AS2 (mov%L1,%3,%1), xops); - output_asm_insn (AS2 (mov%L0,%2,%0), xops); - } - else - { - xops[4] = high[2]; - xops[5] = low[2]; - xops[6] = operands[3]; - output_asm_insn (AS2 (mov%L6,%3,%6), xops); - output_asm_insn (AS2 (sub%L6,%5,%6), xops); - output_asm_insn (AS2 (mov%L1,%6,%1), xops); - output_asm_insn (AS2 (mov%L6,%2,%6), xops); - output_asm_insn (AS2 (sbb%L6,%4,%6), xops); - output_asm_insn (AS2 (mov%L0,%6,%0), xops); - RET; - } - } - - cc_status.value1 = high[0]; - cc_status.flags = CC_NO_OVERFLOW; - - if (GET_CODE (operands[3]) == REG) - { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[2]; - xops[3] = low[2]; - xops[4] = operands[3]; - - output_asm_insn (AS2 (mov%L4,%3,%4), xops); - output_asm_insn (AS2 (sub%L1,%4,%1), xops); - output_asm_insn (AS2 (mov%L4,%2,%4), xops); - output_asm_insn (AS2 (sbb%L0,%4,%0), xops); - } - - else if (GET_CODE (low[2]) != CONST_INT || INTVAL (low[2]) != 0) - { - output_asm_insn (AS2 (sub%L0,%2,%0), low); - output_asm_insn (AS2 (sbb%L0,%2,%0), high); - } - - else - output_asm_insn (AS2 (sub%L0,%2,%0), high); - + "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;") + +(define_insn "*subdi3_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "#") - RET; -}" - [(set_attr "type" "binary")]) +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC 17) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:SI (match_dup 4) + (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC 17))])] + "split_di (operands+0, 1, operands+0, operands+3); + split_di (operands+1, 1, operands+1, operands+4); + split_di (operands+2, 1, operands+2, operands+5);") + +(define_insn "subdi3_carry_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (plus:DI (ltu:DI (reg:CC 17) (const_int 0)) + (match_operand:DI 2 "x86_64_general_operand" "re,rm")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sbb{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "ppro_uops" "few") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_2_rex64" + [(set (reg 17) + (compare + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_3_rex63" + [(set (reg 17) + (compare (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + + +(define_insn "subsi3_carry" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_operand:SI 2 "general_operand" "ri,rm")))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "ppro_uops" "few") + (set_attr "mode" "SI")]) + +(define_insn "subsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=rm,r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0,0") + (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_operand:SI 2 "general_operand" "ri,rm"))))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "ppro_uops" "few") + (set_attr "mode" "SI")]) (define_expand "subsi3" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (minus:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "general_operand" "")))] + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] "" - "IX86_EXPAND_BINARY_OPERATOR (MINUS, SImode, operands);") + "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;") -(define_insn "" +(define_insn "*subsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") - (match_operand:SI 2 "general_operand" "ri,rm")))] + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC 17))] "ix86_binary_operator_ok (MINUS, SImode, operands)" - "* return AS2 (sub%L0,%2,%0);" - [(set_attr "type" "binary")]) + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_2" + [(set (reg 17) + (compare + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_2_zext" + [(set (reg 17) + (compare + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3" + [(set (reg 17) + (compare (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3_zext" + [(set (reg 17) + (compare (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "rim"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) (define_expand "subhi3" - [(set (match_operand:HI 0 "general_operand" "") - (minus:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "general_operand" "")))] - "" - "IX86_EXPAND_BINARY_OPERATOR (MINUS, HImode, operands);") - -(define_insn "" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;") + +(define_insn "*subhi_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") - (match_operand:HI 2 "general_operand" "ri,rm")))] + (match_operand:HI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC 17))] "ix86_binary_operator_ok (MINUS, HImode, operands)" - "* -{ - if (REG_P (operands[0]) - && i386_aligned_p (operands[2]) - && i386_cc_probably_useless_p (insn)) - { - CC_STATUS_INIT; - operands[2] = i386_sext16_if_const (operands[2]); - return AS2 (sub%L0,%k2,%k0); - } - return AS2 (sub%W0,%2,%0); -}" - [(set_attr "type" "binary")]) + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_2" + [(set (reg 17) + (compare + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "ri,rm")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_3" + [(set (reg 17) + (compare (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "ri,rm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) (define_expand "subqi3" - [(set (match_operand:QI 0 "general_operand" "") - (minus:QI (match_operand:QI 1 "general_operand" "") - (match_operand:QI 2 "general_operand" "")))] - "" - "IX86_EXPAND_BINARY_OPERATOR (MINUS, QImode, operands);") - -(define_insn "" + [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;") + +(define_insn "*subqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") - (match_operand:QI 2 "general_operand" "qn,qmn")))] + (match_operand:QI 2 "general_operand" "qn,qmn"))) + (clobber (reg:CC 17))] "ix86_binary_operator_ok (MINUS, QImode, operands)" - "* return AS2 (sub%B0,%2,%0);" - [(set_attr "type" "binary")]) + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_2" + [(set (reg 17) + (compare + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qi,qm")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_3" + [(set (reg 17) + (compare (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qi,qm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) ;; The patterns that match these are at the end of this file. @@ -3913,115 +7197,325 @@ [(set (match_operand:XF 0 "register_operand" "") (minus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" + "") + +(define_expand "subtf3" + [(set (match_operand:TF 0 "register_operand" "") + (minus:TF (match_operand:TF 1 "register_operand" "") + (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") (define_expand "subdf3" [(set (match_operand:DF 0 "register_operand" "") - (minus:DF (match_operand:DF 1 "nonimmediate_operand" "") + (minus:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "subsf3" [(set (match_operand:SF 0 "register_operand" "") - (minus:SF (match_operand:SF 1 "nonimmediate_operand" "") + (minus:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || TARGET_SSE_MATH" "") -;;- multiply instructions - -;(define_insn "mulqi3" -; [(set (match_operand:QI 0 "register_operand" "=a") -; (mult:QI (match_operand:QI 1 "register_operand" "%0") -; (match_operand:QI 2 "nonimmediate_operand" "qm")))] -; "" -; "imul%B0 %2,%0") +;; Multiply instructions + +(define_expand "muldi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" + "") -(define_insn "mulhi3" - [(set (match_operand:HI 0 "register_operand" "=r,r") - (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%0,rm") - (match_operand:HI 2 "general_operand" "g,i")))] +(define_insn "*muldi3_1_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,0,0") + (match_operand:DI 2 "x86_64_general_operand" "K,e,mr"))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "@ + imul{q}\t{%2, %1, %0|%0, %1, %2} + imul{q}\t{%2, %1, %0|%0, %1, %2} + imul{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set_attr "mode" "DI")]) + +(define_expand "mulsi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] "" - "* -{ - if (GET_CODE (operands[1]) == REG - && REGNO (operands[1]) == REGNO (operands[0]) - && (GET_CODE (operands[2]) == MEM || GET_CODE (operands[2]) == REG)) - /* Assembler has weird restrictions. */ - return AS2 (imul%W0,%2,%0); - return AS3 (imul%W0,%2,%1,%0); -}" - [(set_attr "type" "imul")]) + "") -(define_insn "mulsi3" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm") - (match_operand:SI 2 "general_operand" "g,i")))] - "" - "* -{ - if (GET_CODE (operands[1]) == REG - && REGNO (operands[1]) == REGNO (operands[0]) - && (GET_CODE (operands[2]) == MEM || GET_CODE (operands[2]) == REG)) - /* Assembler has weird restrictions. */ - return AS2 (imul%L0,%2,%0); - return AS3 (imul%L0,%2,%1,%0); -}" - [(set_attr "type" "imul")]) +(define_insn "*mulsi3_1" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,0,0") + (match_operand:SI 2 "general_operand" "K,i,mr"))) + (clobber (reg:CC 17))] + "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + ; For the {r,0,i} alternative (i.e., register <- register * immediate), + ; there are two ways of writing the exact same machine instruction + ; in assembly language. One, for example, is: + ; + ; imul $12, %eax + ; + ; while the other is: + ; + ; imul $12, %eax, %eax + ; + ; The first is simply short-hand for the latter. But, some assemblers, + ; like the SCO OSR5 COFF assembler, don't handle the first form. + "@ + imul{l}\t{%2, %1, %0|%0, %1, %2} + imul{l}\t{%2, %1, %0|%0, %1, %2} + imul{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set_attr "mode" "SI")]) + +(define_insn "*mulsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,0,0") + (match_operand:SI 2 "general_operand" "K,i,mr")))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + ; For the {r,0,i} alternative (i.e., register <- register * immediate), + ; there are two ways of writing the exact same machine instruction + ; in assembly language. One, for example, is: + ; + ; imul $12, %eax + ; + ; while the other is: + ; + ; imul $12, %eax, %eax + ; + ; The first is simply short-hand for the latter. But, some assemblers, + ; like the SCO OSR5 COFF assembler, don't handle the first form. + "@ + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set_attr "mode" "SI")]) + +(define_expand "mulhi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_HIMODE_MATH" + "") + +(define_insn "*mulhi3_1" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,0,0") + (match_operand:HI 2 "general_operand" "K,i,mr"))) + (clobber (reg:CC 17))] + "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" + ; %%% There was a note about "Assembler has weird restrictions", + ; concerning alternative 1 when op1 == op0. True? + "@ + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set_attr "mode" "HI")]) + +(define_insn "mulqi3" + [(set (match_operand:QI 0 "register_operand" "=a") + (mult:QI (match_operand:QI 1 "register_operand" "%0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) (define_insn "umulqihi3" [(set (match_operand:HI 0 "register_operand" "=a") (mult:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0")) - (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))] - "" - "mul%B0 %2" - [(set_attr "type" "imul")]) + (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) (define_insn "mulqihi3" [(set (match_operand:HI 0 "register_operand" "=a") (mult:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "%0")) - (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))] - "" - "imul%B0 %2" - [(set_attr "type" "imul")]) - + (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "imul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "umulditi3" + [(set (match_operand:TI 0 "register_operand" "=A") + (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "%0")) + (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "length_immediate" "0") + (set_attr "mode" "DI")]) + +;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers (define_insn "umulsidi3" [(set (match_operand:DI 0 "register_operand" "=A") (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) - (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))] - "TARGET_WIDE_MULTIPLY" - "mul%L0 %2" - [(set_attr "type" "imul")]) + (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "mulditi3" + [(set (match_operand:TI 0 "register_operand" "=A") + (mult:TI (sign_extend:TI (match_operand:DI 1 "register_operand" "%0")) + (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "imul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set_attr "mode" "DI")]) (define_insn "mulsidi3" [(set (match_operand:DI 0 "register_operand" "=A") (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%0")) - (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))] - "TARGET_WIDE_MULTIPLY" - "imul%L0 %2" - [(set_attr "type" "imul")]) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "*umuldi3_highpart_rex64" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "register_operand" "%a")) + (zero_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=a")) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "length_immediate" "0") + (set_attr "mode" "DI")]) (define_insn "umulsi3_highpart" [(set (match_operand:SI 0 "register_operand" "=d") - (truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%a")) - (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (const_int 32)))) - (clobber (match_scratch:SI 3 "=a"))] - "TARGET_WIDE_MULTIPLY" - "mul%L0 %2" - [(set_attr "type" "imul")]) + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "%a")) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=a")) + (clobber (reg:CC 17))] + "" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "*umulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "register_operand" "%a")) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=a")) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "*smuldi3_highpart_rex64" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "register_operand" "%a")) + (sign_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=a")) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "imul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "mode" "DI")]) (define_insn "smulsi3_highpart" [(set (match_operand:SI 0 "register_operand" "=d") - (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%a")) - (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (const_int 32)))) - (clobber (match_scratch:SI 3 "=a"))] - "TARGET_WIDE_MULTIPLY" - "imul%L0 %2" - [(set_attr "type" "imul")]) + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "register_operand" "%a")) + (sign_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=a")) + (clobber (reg:CC 17))] + "" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "mode" "SI")]) + +(define_insn "*smulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "register_operand" "%a")) + (sign_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=a")) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "ppro_uops" "few") + (set_attr "mode" "SI")]) ;; The patterns that match these are at the end of this file. @@ -4029,6 +7523,13 @@ [(set (match_operand:XF 0 "register_operand" "") (mult:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" + "") + +(define_expand "multf3" + [(set (match_operand:TF 0 "register_operand" "") + (mult:TF (match_operand:TF 1 "register_operand" "") + (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -4036,32 +7537,39 @@ [(set (match_operand:DF 0 "register_operand" "") (mult:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "mulsf3" [(set (match_operand:SF 0 "register_operand" "") (mult:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || TARGET_SSE_MATH" "") -;;- divide instructions +;; Divide instructions (define_insn "divqi3" [(set (match_operand:QI 0 "register_operand" "=a") (div:QI (match_operand:HI 1 "register_operand" "0") - (match_operand:QI 2 "nonimmediate_operand" "qm")))] - "" - "idiv%B0 %2") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "idiv{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI") + (set_attr "ppro_uops" "few")]) (define_insn "udivqi3" [(set (match_operand:QI 0 "register_operand" "=a") (udiv:QI (match_operand:HI 1 "register_operand" "0") - (match_operand:QI 2 "nonimmediate_operand" "qm")))] - "" - "div%B0 %2" - [(set_attr "type" "idiv")]) + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI") + (set_attr "ppro_uops" "few")]) ;; The patterns that match these are at the end of this file. @@ -4069,6 +7577,13 @@ [(set (match_operand:XF 0 "register_operand" "") (div:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] + "!TARGET_64BIT && TARGET_80387" + "") + +(define_expand "divtf3" + [(set (match_operand:TF 0 "register_operand" "") + (div:TF (match_operand:TF 1 "register_operand" "") + (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -4076,2811 +7591,5939 @@ [(set (match_operand:DF 0 "register_operand" "") (div:DF (match_operand:DF 1 "register_operand" "") (match_operand:DF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "") (define_expand "divsf3" [(set (match_operand:SF 0 "register_operand" "") (div:SF (match_operand:SF 1 "register_operand" "") (match_operand:SF 2 "nonimmediate_operand" "")))] - "TARGET_80387" + "TARGET_80387 || TARGET_SSE_MATH" "") ;; Remainder instructions. -(define_insn "divmodsi4" +(define_expand "divmoddi4" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (mod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" + "") + +;; Allow to come the parameter in eax or edx to avoid extra moves. +;; Penalize eax case sligthly because it results in worse scheduling +;; of code. +(define_insn "*divmoddi4_nocltd_rex64" + [(set (match_operand:DI 0 "register_operand" "=&a,?a") + (div:DI (match_operand:DI 2 "register_operand" "1,0") + (match_operand:DI 3 "nonimmediate_operand" "rm,rm"))) + (set (match_operand:DI 1 "register_operand" "=&d,&d") + (mod:DI (match_dup 2) (match_dup 3))) + (clobber (reg:CC 17))] + "TARGET_64BIT && !optimize_size && !TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmoddi4_cltd_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (div:DI (match_operand:DI 2 "register_operand" "a") + (match_operand:DI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 1 "register_operand" "=&d") + (mod:DI (match_dup 2) (match_dup 3))) + (clobber (reg:CC 17))] + "TARGET_64BIT && (optimize_size || TARGET_USE_CLTD)" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmoddi_noext_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (div:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=d") + (mod:DI (match_dup 1) (match_dup 2))) + (use (match_operand:DI 4 "register_operand" "3")) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "idiv{q}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "DI") + (set_attr "ppro_uops" "few")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (mod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (match_dup 3) + (ashiftrt:DI (match_dup 4) (const_int 63))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 0) + (div:DI (reg:DI 0) (match_dup 2))) + (set (match_dup 3) + (mod:DI (reg:DI 0) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC 17))])] +{ + /* Avoid use of cltd in favour of a mov+shift. */ + if (!TARGET_USE_CLTD && !optimize_size) + { + if (true_regnum (operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_move_insn (operands[3], operands[1]); + operands[4] = operands[3]; + } + else + { + if (true_regnum (operands[1])) + abort(); + operands[4] = operands[1]; + } +}) + + +(define_expand "divmodsi4" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))])] + "" + "") + +;; Allow to come the parameter in eax or edx to avoid extra moves. +;; Penalize eax case sligthly because it results in worse scheduling +;; of code. +(define_insn "*divmodsi4_nocltd" + [(set (match_operand:SI 0 "register_operand" "=&a,?a") + (div:SI (match_operand:SI 2 "register_operand" "1,0") + (match_operand:SI 3 "nonimmediate_operand" "rm,rm"))) + (set (match_operand:SI 1 "register_operand" "=&d,&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC 17))] + "!optimize_size && !TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmodsi4_cltd" + [(set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC 17))] + "optimize_size || TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmodsi_noext" [(set (match_operand:SI 0 "register_operand" "=a") (div:SI (match_operand:SI 1 "register_operand" "0") (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (set (match_operand:SI 3 "register_operand" "=&d") - (mod:SI (match_dup 1) (match_dup 2)))] + (set (match_operand:SI 3 "register_operand" "=d") + (mod:SI (match_dup 1) (match_dup 2))) + (use (match_operand:SI 4 "register_operand" "3")) + (clobber (reg:CC 17))] "" - "* -{ -#ifdef INTEL_SYNTAX - output_asm_insn (\"cdq\", operands); -#else - output_asm_insn (\"cltd\", operands); -#endif - return AS1 (idiv%L0,%2); -}" - [(set_attr "type" "idiv")]) + "idiv{l}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "few")]) +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] + "reload_completed" + [(parallel [(set (match_dup 3) + (ashiftrt:SI (match_dup 4) (const_int 31))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 0) + (div:SI (reg:SI 0) (match_dup 2))) + (set (match_dup 3) + (mod:SI (reg:SI 0) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC 17))])] +{ + /* Avoid use of cltd in favour of a mov+shift. */ + if (!TARGET_USE_CLTD && !optimize_size) + { + if (true_regnum (operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_move_insn (operands[3], operands[1]); + operands[4] = operands[3]; + } + else + { + if (true_regnum (operands[1])) + abort(); + operands[4] = operands[1]; + } +}) +;; %%% Split me. (define_insn "divmodhi4" [(set (match_operand:HI 0 "register_operand" "=a") (div:HI (match_operand:HI 1 "register_operand" "0") (match_operand:HI 2 "nonimmediate_operand" "rm"))) (set (match_operand:HI 3 "register_operand" "=&d") - (mod:HI (match_dup 1) (match_dup 2)))] - "" - "cwtd\;idiv%W0 %2" - [(set_attr "type" "idiv")]) + (mod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "cwtd\;idiv{w}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "udivmoddi4" + [(set (match_operand:DI 0 "register_operand" "=a") + (udiv:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=&d") + (umod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "xor{q}\t%3, %3\;div{q}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "DI")]) + +(define_insn "*udivmoddi4_noext" + [(set (match_operand:DI 0 "register_operand" "=a") + (udiv:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=d") + (umod:DI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "div{q}\t%2" + [(set_attr "type" "idiv") + (set_attr "ppro_uops" "few") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (udiv:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (umod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:DI (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (umod:DI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC 17))])] + "") -;; ??? Can we make gcc zero extend operand[0]? (define_insn "udivmodsi4" [(set (match_operand:SI 0 "register_operand" "=a") (udiv:SI (match_operand:SI 1 "register_operand" "0") (match_operand:SI 2 "nonimmediate_operand" "rm"))) (set (match_operand:SI 3 "register_operand" "=&d") - (umod:SI (match_dup 1) (match_dup 2)))] - "" - "* -{ - output_asm_insn (AS2 (xor%L3,%3,%3), operands); - return AS1 (div%L0,%2); -}" - [(set_attr "type" "idiv")]) - -;; ??? Can we make gcc zero extend operand[0]? -(define_insn "udivmodhi4" - [(set (match_operand:HI 0 "register_operand" "=a") - (udiv:HI (match_operand:HI 1 "register_operand" "0") - (match_operand:HI 2 "nonimmediate_operand" "rm"))) - (set (match_operand:HI 3 "register_operand" "=&d") - (umod:HI (match_dup 1) (match_dup 2)))] + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] "" - "* -{ - output_asm_insn (AS2 (xor%W0,%3,%3), operands); - return AS1 (div%W0,%2); -}" - [(set_attr "type" "idiv")]) - -/* -;;this should be a valid double division which we may want to add + "xor{l}\t%3, %3\;div{l}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) -(define_insn "" +(define_insn "*udivmodsi4_noext" [(set (match_operand:SI 0 "register_operand" "=a") - (udiv:DI (match_operand:DI 1 "register_operand" "a") + (udiv:SI (match_operand:SI 1 "register_operand" "0") (match_operand:SI 2 "nonimmediate_operand" "rm"))) (set (match_operand:SI 3 "register_operand" "=d") - (umod:SI (match_dup 1) (match_dup 2)))] + (umod:SI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC 17))] "" - "div%L0 %2,%0" - [(set_attr "type" "idiv")]) -*/ - -;;- and instructions + "div{l}\t%2" + [(set_attr "type" "idiv") + (set_attr "ppro_uops" "few") + (set_attr "mode" "SI")]) -;; On i386, -;; movzbl %bl,%ebx -;; is faster than -;; andl $255,%ebx -;; -;; but if the reg is %eax, then the "andl" is faster. -;; -;; On i486, the "andl" is always faster than the "movzbl". -;; -;; On both i386 and i486, a three operand AND is as fast with movzbl or -;; movzwl as with andl, if operands[0] != operands[1]. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))] + "reload_completed" + [(set (match_dup 3) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (umod:SI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC 17))])] + "") -;; The `r' in `rm' for operand 3 looks redundant, but it causes -;; optional reloads to be generated if op 3 is a pseudo in a stack slot. +(define_expand "udivmodhi4" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_operand:HI 0 "register_operand" "") + (udiv:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "nonimmediate_operand" ""))) + (set (match_operand:HI 3 "register_operand" "") + (umod:HI (match_dup 1) (match_dup 2))) + (use (match_dup 4)) + (clobber (reg:CC 17))])] + "TARGET_HIMODE_MATH" + "operands[4] = gen_reg_rtx (HImode);") + +(define_insn "*udivmodhi_noext" + [(set (match_operand:HI 0 "register_operand" "=a") + (udiv:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:HI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:HI 3 "register_operand" "=d") + (umod:HI (match_dup 1) (match_dup 2))) + (use (match_operand:HI 4 "register_operand" "3")) + (clobber (reg:CC 17))] + "" + "div{w}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "HI") + (set_attr "ppro_uops" "few")]) + +;; We can not use div/idiv for double division, because it causes +;; "division by zero" on the overflow and that's not what we expect +;; from truncate. Because true (non truncating) double division is +;; never generated, we can't create this insn anyway. +; +;(define_insn "" +; [(set (match_operand:SI 0 "register_operand" "=a") +; (truncate:SI +; (udiv:DI (match_operand:DI 1 "register_operand" "A") +; (zero_extend:DI +; (match_operand:SI 2 "nonimmediate_operand" "rm"))))) +; (set (match_operand:SI 3 "register_operand" "=d") +; (truncate:SI +; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) +; (clobber (reg:CC 17))] +; "" +; "div{l}\t{%2, %0|%0, %2}" +; [(set_attr "type" "idiv") +; (set_attr "ppro_uops" "few")]) + +;;- Logical AND instructions + +;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al. +;; Note that this excludes ah. + +(define_insn "*testdi_1_rex64" + [(set (reg 17) + (compare + (and:DI (match_operand:DI 0 "nonimmediate_operand" "%*a,r,*a,r,rm") + (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,Z,e,e,re")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "@ + test{l}\t{%k1, %k0|%k0, %k1} + test{l}\t{%k1, %k0|%k0, %k1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,0,1,1") + (set_attr "mode" "SI,SI,DI,DI,DI") + (set_attr "pent_pair" "uv,np,uv,np,uv")]) + +(define_insn "testsi_1" + [(set (reg 17) + (compare + (and:SI (match_operand:SI 0 "nonimmediate_operand" "%*a,r,rm") + (match_operand:SI 1 "nonmemory_operand" "in,in,rin")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testsi_ccno_1" + [(set (reg:CCNO 17) + (compare:CCNO + (and:SI (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "nonmemory_operand" "")) + (const_int 0)))] + "" + "") -(define_insn "andsi3" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rm")))] +(define_insn "*testhi_1" + [(set (reg 17) + (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%*a,r,rm") + (match_operand:HI 1 "nonmemory_operand" "n,n,rn")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "HI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ccz_1" + [(set (reg:CCZ 17) + (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "nonmemory_operand" "")) + (const_int 0)))] "" - "* + "") + +(define_insn "*testqi_1" + [(set (reg 17) + (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%*a,q,qm,r") + (match_operand:QI 1 "nonmemory_operand" "n,n,qn,n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" { - HOST_WIDE_INT intval; - if (!rtx_equal_p (operands[0], operands[1]) - && rtx_equal_p (operands[0], operands[2])) + if (which_alternative == 3) { - rtx tmp; - tmp = operands[1]; - operands[1] = operands[2]; - operands[2] = tmp; + if (GET_CODE (operands[1]) == CONST_INT + && (INTVAL (operands[1]) & 0xffffff00)) + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "test{l}\t{%1, %k0|%k0, %1}"; } - switch (GET_CODE (operands[2])) - { - case CONST_INT: - if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])) - break; - intval = INTVAL (operands[2]); - /* zero-extend 16->32? */ - if (intval == 0xffff && REG_P (operands[0]) - && (! REG_P (operands[1]) - || REGNO (operands[0]) != 0 || REGNO (operands[1]) != 0) - && (!TARGET_ZERO_EXTEND_WITH_AND || ! rtx_equal_p (operands[0], operands[1]))) - { - /* ??? tege: Should forget CC_STATUS only if we clobber a - remembered operand. Fix that later. */ - CC_STATUS_INIT; -#ifdef INTEL_SYNTAX - return AS2 (movzx,%w1,%0); -#else - return AS2 (movz%W0%L0,%w1,%0); -#endif - } - - /* zero extend 8->32? */ - if (intval == 0xff && REG_P (operands[0]) - && !(REG_P (operands[1]) && NON_QI_REG_P (operands[1])) - && (! REG_P (operands[1]) - || REGNO (operands[0]) != 0 || REGNO (operands[1]) != 0) - && (!TARGET_ZERO_EXTEND_WITH_AND || ! rtx_equal_p (operands[0], operands[1]))) - { - /* ??? tege: Should forget CC_STATUS only if we clobber a - remembered operand. Fix that later. */ - CC_STATUS_INIT; -#ifdef INTEL_SYNTAX - return AS2 (movzx,%b1,%0); -#else - return AS2 (movz%B0%L0,%b1,%0); -#endif - } - - /* Check partial bytes.. non-QI-regs are not available */ - if (REG_P (operands[0]) && ! QI_REG_P (operands[0])) - break; - - /* only low byte has zero bits? */ - if (~(intval | 0xff) == 0) - { - intval &= 0xff; - if (REG_P (operands[0])) - { - if (intval == 0) - { - CC_STATUS_INIT; - return AS2 (xor%B0,%b0,%b0); - } - - /* we're better off with the 32-bit version if reg != EAX */ - /* the value is sign-extended in 8 bits */ - if (REGNO (operands[0]) != 0 && (intval & 0x80)) - break; - } - - CC_STATUS_INIT; + return "test{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1,1") + (set_attr "mode" "QI,QI,QI,SI") + (set_attr "pent_pair" "uv,np,uv,np")]) + +(define_expand "testqi_ext_ccno_0" + [(set (reg:CCNO 17) + (compare:CCNO + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "" + "") - operands[2] = GEN_INT (intval); +(define_insn "*testqi_ext_0" + [(set (reg 17) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "n")) + (const_int 0)))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) <= 0xff + && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI") + (set_attr "length_immediate" "1") + (set_attr "pent_pair" "np")]) + +(define_insn "*testqi_ext_1" + [(set (reg 17) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "nonimmediate_operand" "Qm"))) + (const_int 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_1_rex64" + [(set (reg 17) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "register_operand" "Q"))) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_2" + [(set (reg 17) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8))) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +;; Combine likes to form bit extractions for some tests. Humor it. +(define_insn "*testqi_ext_3" + [(set (reg 17) + (compare (zero_extract:SI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode) + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") - if (intval == 0) - return AS2 (mov%B0,%2,%b0); +(define_insn "*testqi_ext_3_rex64" + [(set (reg 17) + (compare (zero_extract:DI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:DI 1 "const_int_operand" "") + (match_operand:DI 2 "const_int_operand" "")) + (const_int 0)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + /* The code below cannot deal with constants outside HOST_WIDE_INT. */ + && INTVAL (operands[1]) + INTVAL (operands[2]) < HOST_BITS_PER_WIDE_INT + /* Ensure that resulting mask is zero or sign extended operand. */ + && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 + && INTVAL (operands[1]) > 32)) + && (GET_MODE (operands[0]) == SImode + || GET_MODE (operands[0]) == DImode + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") - return AS2 (and%B0,%2,%b0); - } +(define_split + [(set (reg 17) + (compare (zero_extract + (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))] +{ + HOST_WIDE_INT len = INTVAL (operands[1]); + HOST_WIDE_INT pos = INTVAL (operands[2]); + HOST_WIDE_INT mask; + enum machine_mode mode, submode; - /* only second byte has zero? */ - if (~(intval | 0xff00) == 0) + mode = GET_MODE (operands[0]); + if (GET_CODE (operands[0]) == MEM) + { + /* ??? Combine likes to put non-volatile mem extractions in QImode + no matter the size of the test. So find a mode that works. */ + if (! MEM_VOLATILE_P (operands[0])) { - CC_STATUS_INIT; - - intval = (intval >> 8) & 0xff; - operands[2] = GEN_INT (intval); - if (intval == 0) - { - if (REG_P (operands[0])) - return AS2 (xor%B0,%h0,%h0); - operands[0] = adj_offsettable_operand (operands[0], 1); - return AS2 (mov%B0,%2,%b0); - } - - if (REG_P (operands[0])) - return AS2 (and%B0,%2,%h0); - - operands[0] = adj_offsettable_operand (operands[0], 1); - return AS2 (and%B0,%2,%b0); + mode = smallest_mode_for_size (pos + len, MODE_INT); + operands[0] = adjust_address (operands[0], mode, 0); } + } + else if (GET_CODE (operands[0]) == SUBREG + && (submode = GET_MODE (SUBREG_REG (operands[0])), + GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) + && pos + len <= GET_MODE_BITSIZE (submode)) + { + /* Narrow a paradoxical subreg to prevent partial register stalls. */ + mode = submode; + operands[0] = SUBREG_REG (operands[0]); + } + else if (mode == HImode && pos + len <= 8) + { + /* Small HImode tests can be converted to QImode. */ + mode = QImode; + operands[0] = gen_lowpart (QImode, operands[0]); + } - if (REG_P (operands[0])) - break; + mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1; + mask &= ~(((HOST_WIDE_INT)1 << pos) - 1); - /* third byte has zero bits? */ - if (~(intval | 0xff0000) == 0) - { - intval = (intval >> 16) & 0xff; - operands[0] = adj_offsettable_operand (operands[0], 2); -byte_and_operation: - CC_STATUS_INIT; - operands[2] = GEN_INT (intval); - if (intval == 0) - return AS2 (mov%B0,%2,%b0); - return AS2 (and%B0,%2,%b0); - } + operands[3] = gen_rtx_AND (mode, operands[0], + GEN_INT (trunc_int_for_mode (mask, mode))); +}) - /* fourth byte has zero bits? */ - if (~(intval | 0xff000000) == 0) - { - intval = (intval >> 24) & 0xff; - operands[0] = adj_offsettable_operand (operands[0], 3); - goto byte_and_operation; - } +;; %%% This used to optimize known byte-wide and operations to memory, +;; and sometimes to QImode registers. If this is considered useful, +;; it should be done with splitters. - /* Low word is zero? */ - if (intval == 0xffff0000) - { -word_zero_and_operation: - CC_STATUS_INIT; - operands[2] = const0_rtx; - return AS2 (mov%W0,%2,%w0); - } +(define_expand "anddi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (and:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_szext_general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "ix86_expand_binary_operator (AND, DImode, operands); DONE;") + +(define_insn "*anddi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; - /* High word is zero? */ - if (intval == 0x0000ffff) - { - operands[0] = adj_offsettable_operand (operands[0], 2); - goto word_zero_and_operation; - } + if (GET_CODE (operands[2]) != CONST_INT) + abort (); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else if (INTVAL (operands[2]) == 0xffff) + mode = HImode; + else + abort (); + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bq|x}\t{%1,%0|%0, %1}"; + else + return "movz{wq|x}\t{%1,%0|%0, %1}"; + } default: - break; + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (get_attr_mode (insn) == MODE_SI) + return "and{l}\t{%k2, %k0|%k0, %k2}"; + else + return "and{q}\t{%2, %0|%0, %2}"; } +} + [(set_attr "type" "alu,alu,alu,imovx") + (set_attr "length_immediate" "*,*,*,0") + (set_attr "mode" "SI,DI,DI,DI")]) + +(define_insn "*anddi_2" + [(set (reg 17) + (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm") + (and:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, DImode, operands)" + "@ + and{l}\t{%k2, %k0|%k0, %k2} + and{q}\t{%2, %0|%0, %2} + and{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI,DI,DI")]) - return AS2 (and%L0,%2,%0); -}" - [(set_attr "type" "binary")]) - -(define_insn "andhi3" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "ri,rm")))] +(define_expand "andsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC 17))] "" - "* + "ix86_expand_binary_operator (AND, SImode, operands); DONE;") + +(define_insn "*andsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:SI 2 "general_operand" "ri,rm,L"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (AND, SImode, operands)" { - if (GET_CODE (operands[2]) == CONST_INT - && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))) + switch (get_attr_type (insn)) { - /* Can we ignore the upper byte? */ - if ((! REG_P (operands[0]) || QI_REG_P (operands[0])) - && (INTVAL (operands[2]) & 0xff00) == 0xff00) - { - CC_STATUS_INIT; + case TYPE_IMOVX: + { + enum machine_mode mode; - if ((INTVAL (operands[2]) & 0xff) == 0) - { - operands[2] = const0_rtx; - return AS2 (mov%B0,%2,%b0); - } + if (GET_CODE (operands[2]) != CONST_INT) + abort (); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else if (INTVAL (operands[2]) == 0xffff) + mode = HImode; + else + abort (); + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bl|x}\t{%1,%0|%0, %1}"; + else + return "movz{wl|x}\t{%1,%0|%0, %1}"; + } - operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); - return AS2 (and%B0,%2,%b0); - } + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + return "and{l}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "SI")]) - /* Can we ignore the lower byte? */ - /* ??? what about offsettable memory references? */ - if (QI_REG_P (operands[0]) && (INTVAL (operands[2]) & 0xff) == 0xff) - { - CC_STATUS_INIT; +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_dup 0) + (const_int -65536))) + (clobber (reg:CC 17))] + "optimize_size" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (HImode, operands[0]);") - if ((INTVAL (operands[2]) & 0xff00) == 0) - { - operands[2] = const0_rtx; - return AS2 (mov%B0,%2,%h0); - } +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -256))) + (clobber (reg:CC 17))] + "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (QImode, operands[0]);") - operands[2] = GEN_INT ((INTVAL (operands[2]) >> 8) & 0xff); - return AS2 (and%B0,%2,%h0); - } +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -65281))) + (clobber (reg:CC 17))] + "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed" + [(parallel [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]);") + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andsi_2" + [(set (reg 17) + (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (and:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_2_zext" + [(set (reg 17) + (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "andhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (AND, HImode, operands); DONE;") + +(define_insn "*andhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:HI 2 "general_operand" "ri,rm,L"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (AND, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + if (GET_CODE (operands[2]) != CONST_INT) + abort (); + if (INTVAL (operands[2]) == 0xff) + return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; + abort (); - /* use 32-bit ops on registers when there are no sign issues.. */ - if (REG_P (operands[0])) - { - if (!(INTVAL (operands[2]) & ~0x7fff)) - return AS2 (and%L0,%2,%k0); - } - } + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); - if (REG_P (operands[0]) - && i386_aligned_p (operands[2])) - { - CC_STATUS_INIT; - /* If op[2] is constant, we should zero-extend it and */ - /* make a note that op[0] has been zero-extended, so */ - /* that we could use 32-bit ops on it forthwith, but */ - /* there is no such reg-note available. Instead we do */ - /* a sign extension as that can result in shorter asm */ - operands[2] = i386_sext16_if_const (operands[2]); - return AS2 (and%L0,%k2,%k0); + return "and{w}\t{%2, %0|%0, %2}"; } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "HI,HI,SI")]) + +(define_insn "*andhi_2" + [(set (reg 17) + (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (and:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, HImode, operands)" + "and{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "andqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (AND, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*andqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qi,qmi,ri"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (AND, QImode, operands)" + "@ + and{b}\t{%2, %0|%0, %2} + and{b}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) - /* Use a 32-bit word with the upper bits set, invalidate CC */ - if (GET_CODE (operands[2]) == CONST_INT - && i386_aligned_p (operands[0])) +(define_insn "*andqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qi,qmi"))) + (clobber (reg:CC 17))] + "" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_2" + [(set (reg 17) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qim,qi,i")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, QImode, operands)" +{ + if (which_alternative == 2) { - HOST_WIDE_INT val = INTVAL (operands[2]); - CC_STATUS_INIT; - val |= ~0xffff; - if (val != INTVAL (operands[2])) - operands[2] = GEN_INT (val); - return AS2 (and%L0,%k2,%k0); + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) & 0xffffff00)) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); + return "and{l}\t{%2, %k0|%k0, %2}"; } - - return AS2 (and%W0,%2,%0); -}" - [(set_attr "type" "binary")]) - -(define_insn "andqi3" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") - (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qn,qmn")))] - "" - "* return AS2 (and%B0,%2,%0);" - [(set_attr "type" "binary")]) - -/* I am nervous about these two.. add them later.. -;I presume this means that we have something in say op0= eax which is small -;and we want to and it with memory so we can do this by just an -;andb m,%al and have success. -(define_insn "" - [(set (match_operand:SI 0 "general_operand" "=r") - (and:SI (zero_extend:SI - (match_operand:HI 1 "nonimmediate_operand" "rm")) - (match_operand:SI 2 "general_operand" "0")))] - "GET_CODE (operands[2]) == CONST_INT - && (unsigned int) INTVAL (operands[2]) < (1 << GET_MODE_BITSIZE (HImode))" - "and%W0 %1,%0") - -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=q") + return "and{b}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*andqi_2_slp" + [(set (reg 17) + (compare (and:QI + (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "nonimmediate_operand" "qmi,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (and:QI (match_dup 0) (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode)" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +;; ??? A bug in recog prevents it from recognizing a const_int as an +;; operand to zero_extend in andqi_ext_1. It was checking explicitly +;; for a QImode operand, which of course failed. + +(define_insn "andqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC 17))] + "(unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +;; Generated by peephole translating test to and. This shows up +;; often in fp comparisons. + +(define_insn "*andqi_ext_0_cc" + [(set (reg 17) + (compare + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && (unsigned HOST_WIDE_INT)INTVAL (operands[2]) <= 0xff" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) (and:SI - (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")) - (match_operand:SI 2 "register_operand" "0")))] - "GET_CODE (operands[2]) == CONST_INT - && (unsigned int) INTVAL (operands[2]) < (1 << GET_MODE_BITSIZE (QImode))" - "and%L0 %1,%0") - -*/ + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC 17))] + "" + "and{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) -;;- Bit set (inclusive or) instructions +;; Logical inclusive OR instructions -;; This optimizes known byte-wide operations to memory, and in some cases -;; to QI registers.. Note that we don't want to use the QI registers too -;; aggressively, because often the 32-bit register instruction is the same -;; size, and likely to be faster on PentiumPro. -(define_insn "iorsi3" +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "iordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (ior:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "ix86_expand_binary_operator (IOR, DImode, operands); DONE;") + +(define_insn "*iordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rme"))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*iordi_2_rex64" + [(set (reg 17) + (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (ior:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*iordi_3_rex64" + [(set (reg 17) + (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rem")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "" + "ix86_expand_binary_operator (IOR, SImode, operands); DONE;") + +(define_insn "*iorsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rm")))] + (match_operand:SI 2 "general_operand" "ri,rmi"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*iorsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=rm") + (zero_extend:DI + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=rm") + (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_2" + [(set (reg 17) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (ior:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*iorsi_2_zext" + [(set (reg 17) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_2_zext_imm" + [(set (reg 17) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_3" + [(set (reg 17) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "iorhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ior:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (IOR, HImode, operands); DONE;") + +(define_insn "*iorhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") + (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmi,ri"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (IOR, HImode, operands)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*iorhi_2" + [(set (reg 17) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (ior:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, HImode, operands)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*iorhi_3" + [(set (reg 17) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "iorqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ior:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (IOR, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*iorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmi,qi,ri"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (IOR, QImode, operands)" + "@ + or{b}\t{%2, %0|%0, %2} + or{b}\t{%2, %0|%0, %2} + or{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*iorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m")) + (ior:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qmi,qi"))) + (clobber (reg:CC 17))] "" - "* -{ - HOST_WIDE_INT intval; - switch (GET_CODE (operands[2])) - { - case CONST_INT: + "or{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_2" + [(set (reg 17) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qim,qi")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (ior:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, QImode, operands)" + "or{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_2_slp" + [(set (reg 17) + (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qim,qi")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 0) (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode)" + "or{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_3" + [(set (reg 17) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qim")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) - if (REG_P (operands[0]) && ! QI_REG_P (operands[0])) - break; - - /* don't try to optimize volatile accesses */ - if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])) - break; + +;; Logical XOR instructions - intval = INTVAL (operands[2]); - if ((intval & ~0xff) == 0) - { - if (REG_P (operands[0])) - { - /* Do low byte access only for %eax or when high bit is set */ - if (REGNO (operands[0]) != 0 && !(intval & 0x80)) - break; - } +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. -byte_or_operation: - CC_STATUS_INIT; +(define_expand "xordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "ix86_expand_binary_operator (XOR, DImode, operands); DONE;") + +(define_insn "*xordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC 17))] + "TARGET_64BIT + && ix86_binary_operator_ok (XOR, DImode, operands)" + "@ + xor{q}\t{%2, %0|%0, %2} + xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI,DI")]) + +(define_insn "*xordi_2_rex64" + [(set (reg 17) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (xor:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, DImode, operands)" + "@ + xor{q}\t{%2, %0|%0, %2} + xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI,DI")]) + +(define_insn "*xordi_3_rex64" + [(set (reg 17) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rem")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, DImode, operands)" + "xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "" + "ix86_expand_binary_operator (XOR, SImode, operands); DONE;") - if (intval != INTVAL (operands[2])) - operands[2] = GEN_INT (intval); +(define_insn "*xorsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; Add speccase for immediates +(define_insn "*xorsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_2" + [(set (reg 17) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (xor:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*xorsi_2_zext" + [(set (reg 17) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_2_zext_imm" + [(set (reg 17) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_3" + [(set (reg 17) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "xorhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (xor:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (XOR, HImode, operands); DONE;") + +(define_insn "*xorhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") + (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmi,ri"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (XOR, HImode, operands)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*xorhi_2" + [(set (reg 17) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rim,ri")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (xor:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, HImode, operands)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*xorhi_3" + [(set (reg 17) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "xorqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (XOR, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*xorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmi,qi,ri"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (XOR, QImode, operands)" + "@ + xor{b}\t{%2, %0|%0, %2} + xor{b}\t{%2, %0|%0, %2} + xor{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*xorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC 17))] + "" + "xor{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_1" + [(set (reg 17) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qim,qi")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (xor:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, QImode, operands)" + "xor{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_2" + [(set (reg 17) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qim")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1" + [(set (reg 17) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1_rex64" + [(set (reg 17) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_expand "xorqi_cc_ext_1" + [(parallel [ + (set (reg:CCNO 17) + (compare:CCNO + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))])] + "" + "") + +;; Negation instructions - if (intval == 0xff) - return AS2 (mov%B0,%2,%b0); +(define_expand "negdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "" + "ix86_expand_unary_operator (NEG, DImode, operands); DONE;") + +(define_insn "*negdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=ro") + (neg:DI (match_operand:DI 1 "general_operand" "0"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT + && ix86_unary_operator_ok (NEG, DImode, operands)" + "#") - return AS2 (or%B0,%2,%b0); - } +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "general_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ 17) + (compare:CCZ (neg:SI (match_dup 2)) (const_int 0))) + (set (match_dup 0) (neg:SI (match_dup 2)))]) + (parallel + [(set (match_dup 1) + (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC 17))]) + (parallel + [(set (match_dup 1) + (neg:SI (match_dup 1))) + (clobber (reg:CC 17))])] + "split_di (operands+1, 1, operands+2, operands+3); + split_di (operands+0, 1, operands+0, operands+1);") + +(define_insn "*negdi2_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" + "neg{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*negdi2_cmpz_rex64" + [(set (reg:CCZ 17) + (compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (neg:DI (match_dup 1)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" + "neg{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + + +(define_expand "negsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "" + "ix86_expand_unary_operator (NEG, SImode, operands); DONE;") - /* second byte? */ - if ((intval & ~0xff00) == 0) +(define_insn "*negsi2_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; Combine is quite creative about this pattern. +(define_insn "*negsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*negsi2_cmpz" + [(set (reg:CCZ 17) + (compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (neg:SI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*negsi2_cmpz_zext" + [(set (reg:CCZ 17) + (compare:CCZ (lshiftrt:DI + (neg:DI (ashift:DI + (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1) + (const_int 32))) + (const_int 32)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_expand "neghi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_HIMODE_MATH" + "ix86_expand_unary_operator (NEG, HImode, operands); DONE;") + +(define_insn "*neghi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, HImode, operands)" + "neg{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_insn "*neghi2_cmpz" + [(set (reg:CCZ 17) + (compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (neg:HI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, HImode, operands)" + "neg{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_expand "negqi2" + [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_QIMODE_MATH" + "ix86_expand_unary_operator (NEG, QImode, operands); DONE;") + +(define_insn "*negqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + +(define_insn "*negqi2_cmpz" + [(set (reg:CCZ 17) + (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + +;; Changing of sign for FP values is doable using integer unit too. + +(define_expand "negsf2" + [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") + (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_80387" + "if (TARGET_SSE) + { + /* In case operand is in memory, we will not use SSE. */ + if (memory_operand (operands[0], VOIDmode) + && rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_negsf2_memory (operands[0], operands[1])); + else { - intval >>= 8; - - if (REG_P (operands[0])) - { - CC_STATUS_INIT; - operands[2] = GEN_INT (intval); - if (intval == 0xff) - return AS2 (mov%B0,%2,%h0); - - return AS2 (or%B0,%2,%h0); - } - - operands[0] = adj_offsettable_operand (operands[0], 1); - goto byte_or_operation; + /* Using SSE is tricky, since we need bitwise negation of -0 + in register. */ + rtx reg = gen_reg_rtx (SFmode); + rtx dest = operands[0]; + + operands[1] = force_reg (SFmode, operands[1]); + operands[0] = force_reg (SFmode, operands[0]); + emit_move_insn (reg, + gen_lowpart (SFmode, + GEN_INT (trunc_int_for_mode (0x80000000, + SImode)))); + emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); + if (dest != operands[0]) + emit_move_insn (dest, operands[0]); } + DONE; + } + ix86_expand_unary_operator (NEG, SFmode, operands); DONE;") - if (REG_P (operands[0])) - break; - - /* third byte? */ - if ((intval & ~0xff0000) == 0) - { - intval >>= 16; - operands[0] = adj_offsettable_operand (operands[0], 2); - goto byte_or_operation; - } +(define_insn "negsf2_memory" + [(set (match_operand:SF 0 "memory_operand" "=m") + (neg:SF (match_operand:SF 1 "memory_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, SFmode, operands)" + "#") - /* fourth byte? */ - if ((intval & ~0xff000000) == 0) - { - intval = (intval >> 24) & 0xff; - operands[0] = adj_offsettable_operand (operands[0], 3); - goto byte_or_operation; - } +(define_insn "negsf2_ifs" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") + (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) + (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x")) + (clobber (reg:CC 17))] + "TARGET_SSE + && (reload_in_progress || reload_completed + || (register_operand (operands[0], VOIDmode) + && register_operand (operands[1], VOIDmode)))" + "#") - default: - break; - } +(define_split + [(set (match_operand:SF 0 "memory_operand" "") + (neg:SF (match_operand:SF 1 "memory_operand" ""))) + (use (match_operand:SF 2 "" "")) + (clobber (reg:CC 17))] + "" + [(parallel [(set (match_dup 0) + (neg:SF (match_dup 1))) + (clobber (reg:CC 17))])]) - return AS2 (or%L0,%2,%0); -}" - [(set_attr "type" "binary")]) +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" ""))) + (use (match_operand:SF 2 "" "")) + (clobber (reg:CC 17))] + "reload_completed && !SSE_REG_P (operands[0])" + [(parallel [(set (match_dup 0) + (neg:SF (match_dup 1))) + (clobber (reg:CC 17))])]) -(define_insn "iorhi3" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") - (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "ri,rm")))] - "" - "* +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" ""))) + (use (match_operand:SF 2 "register_operand" "")) + (clobber (reg:CC 17))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (subreg:TI (match_dup 0) 0) + (xor:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] { - HOST_WIDE_INT intval; - switch (GET_CODE (operands[2])) + if (operands_match_p (operands[0], operands[2])) { - case CONST_INT: - - if (REG_P (operands[0]) && ! QI_REG_P (operands[0])) - break; - - /* don't try to optimize volatile accesses */ - if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])) - break; - - intval = 0xffff & INTVAL (operands[2]); + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) + + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*negsf2_if" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f") + (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "TARGET_80387 && !TARGET_SSE + && ix86_unary_operator_ok (NEG, SFmode, operands)" + "#") - if ((intval & 0xff00) == 0) - { - if (REG_P (operands[0])) - { - /* Do low byte access only for %eax or when high bit is set */ - if (REGNO (operands[0]) != 0 && !(intval & 0x80)) - break; - } +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (neg:SF (match_dup 1)))] + "") -byte_or_operation: - CC_STATUS_INIT; +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[1] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") - if (intval == 0xff) - return AS2 (mov%B0,%2,%b0); +(define_split + [(set (match_operand 0 "memory_operand" "") + (neg (match_operand 1 "memory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(parallel [(set (match_dup 0) (xor:QI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] +{ + int size = GET_MODE_SIZE (GET_MODE (operands[1])); - return AS2 (or%B0,%2,%b0); - } + /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ + if (size >= 12) + size = 10; + operands[0] = adjust_address (operands[0], QImode, size - 1); + operands[1] = GEN_INT (trunc_int_for_mode (0x80, QImode)); +}) - /* high byte? */ - if ((intval & 0xff) == 0) +(define_expand "negdf2" + [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_80387" + "if (TARGET_SSE2) + { + /* In case operand is in memory, we will not use SSE. */ + if (memory_operand (operands[0], VOIDmode) + && rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_negdf2_memory (operands[0], operands[1])); + else { - intval >>= 8; - operands[2] = GEN_INT (intval); - - if (REG_P (operands[0])) - { - CC_STATUS_INIT; - if (intval == 0xff) - return AS2 (mov%B0,%2,%h0); - - return AS2 (or%B0,%2,%h0); - } - - operands[0] = adj_offsettable_operand (operands[0], 1); - - goto byte_or_operation; + /* Using SSE is tricky, since we need bitwise negation of -0 + in register. */ + rtx reg = gen_reg_rtx (DFmode); +#if HOST_BITS_PER_WIDE_INT >= 64 + rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, + DImode)); +#else + rtx imm = immed_double_const (0, 0x80000000, DImode); +#endif + rtx dest = operands[0]; + + operands[1] = force_reg (DFmode, operands[1]); + operands[0] = force_reg (DFmode, operands[0]); + emit_move_insn (reg, gen_lowpart (DFmode, imm)); + emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); + if (dest != operands[0]) + emit_move_insn (dest, operands[0]); } + DONE; + } + ix86_expand_unary_operator (NEG, DFmode, operands); DONE;") - default: - break; - } - - if (REG_P (operands[0]) - && i386_aligned_p (operands[2])) - { - CC_STATUS_INIT; - operands[2] = i386_sext16_if_const (operands[2]); - return AS2 (or%L0,%k2,%k0); - } +(define_insn "negdf2_memory" + [(set (match_operand:DF 0 "memory_operand" "=m") + (neg:DF (match_operand:DF 1 "memory_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (NEG, DFmode, operands)" + "#") - if (GET_CODE (operands[2]) == CONST_INT - && i386_aligned_p (operands[0])) - { - CC_STATUS_INIT; - intval = 0xffff & INTVAL (operands[2]); - if (intval != INTVAL (operands[2])) - operands[2] = GEN_INT (intval); - return AS2 (or%L0,%2,%k0); - } +(define_insn "negdf2_ifs" + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) + (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_SSE2 + && (reload_in_progress || reload_completed + || (register_operand (operands[0], VOIDmode) + && register_operand (operands[1], VOIDmode)))" + "#") - return AS2 (or%W0,%2,%0); -}" - [(set_attr "type" "binary")]) +(define_insn "*negdf2_ifs_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,fm#Yr,r#Yf") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) + (use (match_operand:DF 2 "general_operand" "Y,0,*g#Yr,*rm")) + (clobber (reg:CC 17))] + "TARGET_64BIT && TARGET_SSE2 + && (reload_in_progress || reload_completed + || (register_operand (operands[0], VOIDmode) + && register_operand (operands[1], VOIDmode)))" + "#") -(define_insn "iorqi3" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") - (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qn,qmn")))] +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (neg:DF (match_operand:DF 1 "memory_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] "" - "* return AS2 (or%B0,%2,%0);" - [(set_attr "type" "binary")]) - -;;- xor instructions + [(parallel [(set (match_dup 0) + (neg:DF (match_dup 1))) + (clobber (reg:CC 17))])]) -(define_insn "xorsi3" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") - (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "ri,rm")))] - "" - "* +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] + "reload_completed && !SSE_REG_P (operands[0]) + && (!TARGET_64BIT || FP_REG_P (operands[0]))" + [(parallel [(set (match_dup 0) + (neg:DF (match_dup 1))) + (clobber (reg:CC 17))])]) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] + "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])" + [(parallel [(set (match_dup 0) + (xor:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (DImode, operands[0]); + operands[1] = gen_lowpart (DImode, operands[1]); + operands[2] = gen_lowpart (DImode, operands[2]);") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "register_operand" "")) + (clobber (reg:CC 17))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (subreg:TI (match_dup 0) 0) + (xor:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] { - HOST_WIDE_INT intval; - switch (GET_CODE (operands[2])) + if (operands_match_p (operands[0], operands[2])) { - case CONST_INT: - - if (REG_P (operands[0]) && ! QI_REG_P (operands[0])) - break; + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*negdf2_if" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_80387 + && ix86_unary_operator_ok (NEG, DFmode, operands)" + "#") - /* don't try to optimize volatile accesses */ - if (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0])) - break; +;; FIXME: We should to allow integer registers here. Problem is that +;; we need another scratch register to get constant from. +;; Forcing constant to mem if no register available in peep2 should be +;; safe even for PIC mode, because of RIP relative addressing. +(define_insn "*negdf2_if_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,mf") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && TARGET_80387 + && ix86_unary_operator_ok (NEG, DFmode, operands)" + "#") - intval = INTVAL (operands[2]); - if ((intval & ~0xff) == 0) - { - if (REG_P (operands[0])) - { - /* Do low byte access only for %eax or when high bit is set */ - if (REGNO (operands[0]) != 0 && !(intval & 0x80)) - break; - } - -byte_xor_operation: - CC_STATUS_INIT; - - if (intval == 0xff - && (!TARGET_PENTIUM || optimize_size - || (GET_CODE (operands[0]) == MEM - && memory_address_info (XEXP (operands[0], 0), 1)))) - return AS1 (not%B0,%b0); - - if (intval != INTVAL (operands[2])) - operands[2] = GEN_INT (intval); - return AS2 (xor%B0,%2,%b0); - } +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (neg:DF (match_dup 1)))] + "") - /* second byte? */ - if ((intval & ~0xff00) == 0) - { - intval >>= 8; +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_80387 && reload_completed + && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 3) (xor:SI (match_dup 3) (match_dup 4))) + (clobber (reg:CC 17))])] + "operands[4] = GEN_INT (trunc_int_for_mode (0x80000000, SImode)); + split_di (operands+0, 1, operands+2, operands+3);") + +(define_expand "negxf2" + [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") + (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "!TARGET_64BIT && TARGET_80387" + "ix86_expand_unary_operator (NEG, XFmode, operands); DONE;") + +(define_expand "negtf2" + [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "") + (neg:TF (match_operand:TF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_80387" + "ix86_expand_unary_operator (NEG, TFmode, operands); DONE;") + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*negxf2_if" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") + (neg:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_80387 + && ix86_unary_operator_ok (NEG, XFmode, operands)" + "#") - if (REG_P (operands[0])) - { - CC_STATUS_INIT; - if (intval == 0xff - && (!TARGET_PENTIUM || optimize_size - || (GET_CODE (operands[0]) == MEM - && memory_address_info (XEXP (operands[0], 0), 1)))) - return AS1 (not%B0,%h0); +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (neg:XF (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (neg:XF (match_dup 1)))] + "") - operands[2] = GEN_INT (intval); - return AS2 (xor%B0,%2,%h0); - } +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (neg:XF (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[1] = GEN_INT (0x8000); + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*negtf2_if" + [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,rm#f") + (neg:TF (match_operand:TF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "TARGET_80387 && ix86_unary_operator_ok (NEG, TFmode, operands)" + "#") - operands[0] = adj_offsettable_operand (operands[0], 1); +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (neg:TF (match_operand:TF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (neg:TF (match_dup 1)))] + "") - goto byte_xor_operation; - } +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (neg:TF (match_operand:TF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[1] = GEN_INT (0x8000); + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") + +;; Conditionize these after reload. If they matches before reload, we +;; lose the clobber and ability to use integer instructions. + +(define_insn "*negsf2_1" + [(set (match_operand:SF 0 "register_operand" "=f") + (neg:SF (match_operand:SF 1 "register_operand" "0")))] + "TARGET_80387 && reload_completed" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "SF") + (set_attr "ppro_uops" "few")]) - if (REG_P (operands[0])) - break; +(define_insn "*negdf2_1" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (match_operand:DF 1 "register_operand" "0")))] + "TARGET_80387 && reload_completed" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF") + (set_attr "ppro_uops" "few")]) - /* third byte? */ - if ((intval & ~0xff0000) == 0) - { - intval >>= 16; - operands[0] = adj_offsettable_operand (operands[0], 2); - goto byte_xor_operation; - } +(define_insn "*negextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (neg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF") + (set_attr "ppro_uops" "few")]) - /* fourth byte? */ - if ((intval & ~0xff000000) == 0) - { - intval = (intval >> 24) & 0xff; - operands[0] = adj_offsettable_operand (operands[0], 3); - goto byte_xor_operation; - } +(define_insn "*negxf2_1" + [(set (match_operand:XF 0 "register_operand" "=f") + (neg:XF (match_operand:XF 1 "register_operand" "0")))] + "!TARGET_64BIT && TARGET_80387 && reload_completed" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF") + (set_attr "ppro_uops" "few")]) - default: - break; - } +(define_insn "*negextenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (neg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "!TARGET_64BIT && TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF") + (set_attr "ppro_uops" "few")]) - return AS2 (xor%L0,%2,%0); -}" - [(set_attr "type" "binary")]) +(define_insn "*negextendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (neg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "!TARGET_64BIT && TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF") + (set_attr "ppro_uops" "few")]) -(define_insn "xorhi3" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") - (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "ri,rm")))] - "" - "* -{ - if (GET_CODE (operands[2]) == CONST_INT - && ! (GET_CODE (operands[0]) == MEM && MEM_VOLATILE_P (operands[0]))) - { - /* Can we ignore the upper byte? */ - if ((! REG_P (operands[0]) || QI_REG_P (operands[0])) - && (INTVAL (operands[2]) & 0xff00) == 0) - { - CC_STATUS_INIT; - if (INTVAL (operands[2]) & 0xffff0000) - operands[2] = GEN_INT (INTVAL (operands[2]) & 0xffff); +(define_insn "*negtf2_1" + [(set (match_operand:TF 0 "register_operand" "=f") + (neg:TF (match_operand:TF 1 "register_operand" "0")))] + "TARGET_80387 && reload_completed" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF") + (set_attr "ppro_uops" "few")]) - if (INTVAL (operands[2]) == 0xff - && (!TARGET_PENTIUM || optimize_size - || (GET_CODE (operands[0]) == MEM - && memory_address_info (XEXP (operands[0], 0), 1)))) - return AS1 (not%B0,%b0); +(define_insn "*negextenddftf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (neg:TF (float_extend:TF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF") + (set_attr "ppro_uops" "few")]) - return AS2 (xor%B0,%2,%b0); - } +(define_insn "*negextendsftf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (neg:TF (float_extend:TF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "fchs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF") + (set_attr "ppro_uops" "few")]) + +;; Absolute value instructions - /* Can we ignore the lower byte? */ - /* ??? what about offsettable memory references? */ - if (QI_REG_P (operands[0]) - && (INTVAL (operands[2]) & 0xff) == 0) +(define_expand "abssf2" + [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") + (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_80387" + "if (TARGET_SSE) + { + /* In case operand is in memory, we will not use SSE. */ + if (memory_operand (operands[0], VOIDmode) + && rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_abssf2_memory (operands[0], operands[1])); + else { - CC_STATUS_INIT; - operands[2] = GEN_INT ((INTVAL (operands[2]) >> 8) & 0xff); - - if (INTVAL (operands[2]) == 0xff - && (!TARGET_PENTIUM || optimize_size - || (GET_CODE (operands[0]) == MEM - && memory_address_info (XEXP (operands[0], 0), 1)))) - return AS1 (not%B0,%h0); - - return AS2 (xor%B0,%2,%h0); + /* Using SSE is tricky, since we need bitwise negation of -0 + in register. */ + rtx reg = gen_reg_rtx (SFmode); + rtx dest = operands[0]; + + operands[1] = force_reg (SFmode, operands[1]); + operands[0] = force_reg (SFmode, operands[0]); + emit_move_insn (reg, + gen_lowpart (SFmode, + GEN_INT (trunc_int_for_mode (0x80000000, + SImode)))); + emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); + if (dest != operands[0]) + emit_move_insn (dest, operands[0]); } - } - - if (REG_P (operands[0]) - && i386_aligned_p (operands[2])) - { - CC_STATUS_INIT; - operands[2] = i386_sext16_if_const (operands[2]); - return AS2 (xor%L0,%k2,%k0); - } + DONE; + } + ix86_expand_unary_operator (ABS, SFmode, operands); DONE;") - if (GET_CODE (operands[2]) == CONST_INT - && i386_aligned_p (operands[0])) - { - HOST_WIDE_INT intval; - CC_STATUS_INIT; - intval = 0xffff & INTVAL (operands[2]); - if (intval != INTVAL (operands[2])) - operands[2] = GEN_INT (intval); - return AS2 (xor%L0,%2,%k0); - } +(define_insn "abssf2_memory" + [(set (match_operand:SF 0 "memory_operand" "=m") + (abs:SF (match_operand:SF 1 "memory_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (ABS, SFmode, operands)" + "#") - return AS2 (xor%W0,%2,%0); -}" - [(set_attr "type" "binary")]) +(define_insn "abssf2_ifs" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0"))) + (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x")) + (clobber (reg:CC 17))] + "TARGET_SSE + && (reload_in_progress || reload_completed + || (register_operand (operands[0], VOIDmode) + && register_operand (operands[1], VOIDmode)))" + "#") -(define_insn "xorqi3" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") - (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qn,qm")))] +(define_split + [(set (match_operand:SF 0 "memory_operand" "") + (abs:SF (match_operand:SF 1 "memory_operand" ""))) + (use (match_operand:SF 2 "" "")) + (clobber (reg:CC 17))] "" - "* return AS2 (xor%B0,%2,%0);" - [(set_attr "type" "binary")]) - -;; logical operations for DImode + [(parallel [(set (match_dup 0) + (abs:SF (match_dup 1))) + (clobber (reg:CC 17))])]) -(define_insn "anddi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro") - (and:DI (match_operand:DI 1 "general_operand" "%0,0") - (match_operand:DI 2 "general_operand" "oriF,riF")))] - "" - "#" - [(set_attr "type" "binary")]) +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (abs:SF (match_operand:SF 1 "register_operand" ""))) + (use (match_operand:SF 2 "" "")) + (clobber (reg:CC 17))] + "reload_completed && !SSE_REG_P (operands[0])" + [(parallel [(set (match_dup 0) + (abs:SF (match_dup 1))) + (clobber (reg:CC 17))])]) +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (abs:SF (match_operand:SF 1 "register_operand" ""))) + (use (match_operand:SF 2 "register_operand" "")) + (clobber (reg:CC 17))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (subreg:TI (match_dup 0) 0) + (and:TI (not:TI (subreg:TI (match_dup 2) 0)) + (subreg:TI (match_dup 1) 0)))]) + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*abssf2_if" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "TARGET_80387 && ix86_unary_operator_ok (ABS, SFmode, operands) && !TARGET_SSE" + "#") -(define_insn "iordi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro") - (ior:DI (match_operand:DI 1 "general_operand" "%0,0") - (match_operand:DI 2 "general_operand" "oriF,riF")))] - "" - "#" - [(set_attr "type" "binary")]) +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (abs:SF (match_operand:SF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0]))" + [(set (match_dup 0) + (abs:SF (match_dup 1)))] + "") -(define_insn "xordi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro") - (xor:DI (match_operand:DI 1 "general_operand" "%0,0") - (match_operand:DI 2 "general_operand" "oriF,riF")))] - "" - "#" - [(set_attr "type" "binary")]) +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (abs:SF (match_operand:SF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[1] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") (define_split - [(set (match_operand:DI 0 "general_operand" "") - (match_operator:DI 3 "ix86_logical_operator" - [(match_operand:DI 1 "general_operand" "") - (match_operand:DI 2 "general_operand" "")]))] - "" - [(set (match_dup 4) (match_op_dup:SI 3 [(match_dup 6) (match_dup 8)])) - (set (match_dup 5) (match_op_dup:SI 3 [(match_dup 7) (match_dup 9)]))] - "split_di (&operands[0], 1, &operands[4], &operands[5]); - split_di (&operands[1], 1, &operands[6], &operands[7]); - split_di (&operands[2], 1, &operands[8], &operands[9]);") + [(set (match_operand 0 "memory_operand" "") + (abs (match_operand 1 "memory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(parallel [(set (match_dup 0) (and:QI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] +{ + int size = GET_MODE_SIZE (GET_MODE (operands[1])); -;;- negation instructions + /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ + if (size >= 12) + size = 10; + operands[0] = adjust_address (operands[0], QImode, size - 1); + operands[1] = GEN_INT (trunc_int_for_mode (~0x80, QImode)); +}) -(define_insn "negdi2" - [(set (match_operand:DI 0 "general_operand" "=&ro") - (neg:DI (match_operand:DI 1 "general_operand" "0")))] - "" - "* -{ - rtx xops[2], low[1], high[1]; +(define_expand "absdf2" + [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") + (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_80387" + "if (TARGET_SSE2) + { + /* In case operand is in memory, we will not use SSE. */ + if (memory_operand (operands[0], VOIDmode) + && rtx_equal_p (operands[0], operands[1])) + emit_insn (gen_absdf2_memory (operands[0], operands[1])); + else + { + /* Using SSE is tricky, since we need bitwise negation of -0 + in register. */ + rtx reg = gen_reg_rtx (DFmode); +#if HOST_BITS_PER_WIDE_INT >= 64 + rtx imm = GEN_INT (trunc_int_for_mode(((HOST_WIDE_INT)1) << 63, + DImode)); +#else + rtx imm = immed_double_const (0, 0x80000000, DImode); +#endif + rtx dest = operands[0]; + + operands[1] = force_reg (DFmode, operands[1]); + operands[0] = force_reg (DFmode, operands[0]); + emit_move_insn (reg, gen_lowpart (DFmode, imm)); + emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg)); + if (dest != operands[0]) + emit_move_insn (dest, operands[0]); + } + DONE; + } + ix86_expand_unary_operator (ABS, DFmode, operands); DONE;") - CC_STATUS_INIT; +(define_insn "absdf2_memory" + [(set (match_operand:DF 0 "memory_operand" "=m") + (abs:DF (match_operand:DF 1 "memory_operand" "0"))) + (clobber (reg:CC 17))] + "ix86_unary_operator_ok (ABS, DFmode, operands)" + "#") - split_di (operands, 1, low, high); - xops[0] = const0_rtx; - xops[1] = high[0]; +(define_insn "absdf2_ifs" + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0"))) + (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_SSE2 + && (reload_in_progress || reload_completed + || (register_operand (operands[0], VOIDmode) + && register_operand (operands[1], VOIDmode)))" + "#") - output_asm_insn (AS1 (neg%L0,%0), low); - output_asm_insn (AS2 (adc%L1,%0,%1), xops); - output_asm_insn (AS1 (neg%L0,%0), high); - RET; -}") +(define_insn "*absdf2_ifs_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0"))) + (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y")) + (clobber (reg:CC 17))] + "TARGET_64BIT && TARGET_SSE2 + && (reload_in_progress || reload_completed + || (register_operand (operands[0], VOIDmode) + && register_operand (operands[1], VOIDmode)))" + "#") -(define_insn "negsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") - (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")))] +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (abs:DF (match_operand:DF 1 "memory_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] "" - "neg%L0 %0") + [(parallel [(set (match_dup 0) + (abs:DF (match_dup 1))) + (clobber (reg:CC 17))])]) -(define_insn "neghi2" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") - (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")))] - "" - "* - if (REG_P (operands[0]) && i386_cc_probably_useless_p (insn)) - { - CC_STATUS_INIT; - return AS1(neg%L0,%k0); - } - return AS1(neg%W0,%0);") +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (abs:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "" "")) + (clobber (reg:CC 17))] + "reload_completed && !SSE_REG_P (operands[0])" + [(parallel [(set (match_dup 0) + (abs:DF (match_dup 1))) + (clobber (reg:CC 17))])]) -(define_insn "negqi2" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") - (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")))] - "" - "neg%B0 %0") +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (abs:DF (match_operand:DF 1 "register_operand" ""))) + (use (match_operand:DF 2 "register_operand" "")) + (clobber (reg:CC 17))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (subreg:TI (match_dup 0) 0) + (and:TI (not:TI (subreg:TI (match_dup 2) 0)) + (subreg:TI (match_dup 1) 0)))]) + + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*absdf2_if" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_80387 + && ix86_unary_operator_ok (ABS, DFmode, operands)" + "#") -(define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (neg:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fpop")]) +;; FIXME: We should to allow integer registers here. Problem is that +;; we need another scratch register to get constant from. +;; Forcing constant to mem if no register available in peep2 should be +;; safe even for PIC mode, because of RIP relative addressing. +(define_insn "*absdf2_if_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,mf") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && TARGET_80387 + && ix86_unary_operator_ok (ABS, DFmode, operands)" + "#") -(define_insn "negdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fpop")]) +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (abs:DF (match_operand:DF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (abs:DF (match_dup 1)))] + "") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (neg:DF (float_extend:DF (match_operand:SF 1 "register_operand" "0"))))] +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (abs:DF (match_operand:DF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_80387 && reload_completed && + !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 3) (and:SI (match_dup 3) (match_dup 4))) + (clobber (reg:CC 17))])] + "operands[4] = GEN_INT (trunc_int_for_mode (~0x80000000, SImode)); + split_di (operands+0, 1, operands+2, operands+3);") + +(define_expand "absxf2" + [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") + (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] + "!TARGET_64BIT && TARGET_80387" + "ix86_expand_unary_operator (ABS, XFmode, operands); DONE;") + +(define_expand "abstf2" + [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "") + (neg:TF (match_operand:TF 1 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))])] "TARGET_80387" - "fchs" - [(set_attr "type" "fpop")]) + "ix86_expand_unary_operator (ABS, TFmode, operands); DONE;") + +;; Keep 'f' and 'r' in separate alternatives to avoid reload problems +;; because of secondary memory needed to reload from class FLOAT_INT_REGS +;; to itself. +(define_insn "*absxf2_if" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") + (abs:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_80387 + && ix86_unary_operator_ok (ABS, XFmode, operands)" + "#") -(define_insn "negxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (match_operand:XF 1 "register_operand" "0")))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fpop")]) +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (abs:XF (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (abs:XF (match_dup 1)))] + "") -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f") - (neg:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fpop")]) - -;; Absolute value instructions +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (abs:XF (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[1] = GEN_INT (~0x8000); + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") + +(define_insn "*abstf2_if" + [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,rm#f") + (abs:TF (match_operand:TF 1 "nonimmediate_operand" "0,0"))) + (clobber (reg:CC 17))] + "TARGET_80387 && ix86_unary_operator_ok (ABS, TFmode, operands)" + "#") -(define_insn "abssf2" +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (abs:TF (match_operand:TF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && FP_REGNO_P (REGNO (operands[0])) && reload_completed" + [(set (match_dup 0) + (abs:TF (match_dup 1)))] + "") + +(define_split + [(set (match_operand:TF 0 "register_operand" "") + (abs:TF (match_operand:TF 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_80387 && reload_completed && !FP_REGNO_P (REGNO (operands[0]))" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[1] = GEN_INT (~0x8000); + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") + +(define_insn "*abssf2_1" [(set (match_operand:SF 0 "register_operand" "=f") (abs:SF (match_operand:SF 1 "register_operand" "0")))] - "TARGET_80387" + "TARGET_80387 && reload_completed" "fabs" - [(set_attr "type" "fpop")]) + [(set_attr "type" "fsgn") + (set_attr "mode" "SF")]) -(define_insn "absdf2" +(define_insn "*absdf2_1" [(set (match_operand:DF 0 "register_operand" "=f") (abs:DF (match_operand:DF 1 "register_operand" "0")))] - "TARGET_80387" + "TARGET_80387 && reload_completed" "fabs" - [(set_attr "type" "fpop")]) + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) -(define_insn "" +(define_insn "*absextendsfdf2" [(set (match_operand:DF 0 "register_operand" "=f") - (abs:DF (float_extend:DF (match_operand:SF 1 "register_operand" "0"))))] + (abs:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] "TARGET_80387" "fabs" - [(set_attr "type" "fpop")]) + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) -(define_insn "absxf2" +(define_insn "*absxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (match_operand:XF 1 "register_operand" "0")))] - "TARGET_80387" + "!TARGET_64BIT && TARGET_80387 && reload_completed" "fabs" - [(set_attr "type" "fpop")]) + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) -(define_insn "" +(define_insn "*absextenddfxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (abs:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" + (abs:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "!TARGET_64BIT && TARGET_80387" "fabs" - [(set_attr "type" "fpop")]) + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) -(define_insn "sqrtsf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (sqrt:SF (match_operand:SF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" - "fsqrt") - -(define_insn "sqrtdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (sqrt:DF (match_operand:DF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (TARGET_IEEE_FP || flag_fast_math) " - "fsqrt") - -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (sqrt:DF (float_extend:DF - (match_operand:SF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" - "fsqrt") - -(define_insn "sqrtxf2" +(define_insn "*absextendsfxf2" [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 - && (TARGET_IEEE_FP || flag_fast_math) " - "fsqrt") - -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF (float_extend:XF - (match_operand:DF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" - "fsqrt") + (abs:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "!TARGET_64BIT && TARGET_80387" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f") - (sqrt:XF (float_extend:XF - (match_operand:SF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" - "fsqrt") +(define_insn "*abstf2_1" + [(set (match_operand:TF 0 "register_operand" "=f") + (abs:TF (match_operand:TF 1 "register_operand" "0")))] + "TARGET_80387 && reload_completed" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) -(define_insn "sindf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fsin") +(define_insn "*absextenddftf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (abs:TF (float_extend:TF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) -(define_insn "sinsf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fsin") +(define_insn "*absextendsftf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (abs:TF (float_extend:TF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "fabs" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +;; One complement instructions -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fsin") +(define_expand "one_cmpldi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_unary_operator (NOT, DImode, operands); DONE;") + +(define_insn "*one_cmpldi2_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands)" + "not{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + +(define_insn "*one_cmpldi2_2_rex64" + [(set (reg 17) + (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_dup 1)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, DImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "DI")]) -(define_insn "sinxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 1))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fsin") +(define_split + [(set (reg 17) + (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "") + (not:DI (match_dup 1)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO (xor:DI (match_dup 1) (const_int -1)) + (const_int 0))) + (set (match_dup 0) + (xor:DI (match_dup 1) (const_int -1)))])] + "") -(define_insn "cosdf2" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fcos") +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NOT, SImode, operands); DONE;") -(define_insn "cossf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fcos") +(define_insn "*one_cmplsi2_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_operand:SI 1 "register_operand" "0"))))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*one_cmplsi2_2" + [(set (reg 17) + (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (not:SI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(float_extend:DF - (match_operand:SF 1 "register_operand" "0"))] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fcos") +(define_split + [(set (reg 17) + (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO (xor:SI (match_dup 1) (const_int -1)) + (const_int 0))) + (set (match_dup 0) + (xor:SI (match_dup 1) (const_int -1)))])] + "") -(define_insn "cosxf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 2))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_fast_math" - "fcos") - -;;- one complement instructions +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_2_zext" + [(set (reg 17) + (compare (not:SI (match_operand:SI 1 "register_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_dup 1))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) -(define_insn "one_cmplsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") - (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))] - "" - "* -{ - /* A Pentium NOT is not pariable. Output it only in case of complex - memory address, because XOR will be inpariable anyway because - of immediate/displacement rule. */ +(define_split + [(set (reg 17) + (compare (not:SI (match_operand:SI 1 "register_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (not:SI (match_dup 1))))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO (xor:SI (match_dup 1) (const_int -1)) + (const_int 0))) + (set (match_dup 0) + (zero_extend:DI (xor:SI (match_dup 1) (const_int -1))))])] + "") - if (TARGET_PENTIUM && !optimize_size - && (GET_CODE (operands[0]) != MEM - || memory_address_info (XEXP (operands[0], 0), 1) == 0)) - { - rtx xops[2]; - xops[0] = operands[0]; - xops[1] = GEN_INT (0xffffffff); - output_asm_insn (AS2 (xor%L0,%1,%0), xops); - RET; - } - else - return AS1 (not%L0,%0); -}") +(define_expand "one_cmplhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_unary_operator (NOT, HImode, operands); DONE;") -(define_insn "one_cmplhi2" +(define_insn "*one_cmplhi2_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))] - "" - "* -{ - /* A Pentium NOT is not pariable. Output it only in case of complex - memory address, because XOR will be inpariable anyway because - of immediate/displacement rule. */ - - if (TARGET_PENTIUM && !optimize_size - && (GET_CODE (operands[0]) != MEM - || memory_address_info (XEXP (operands[0], 0), 1) == 0)) - { - rtx xops[2]; - xops[0] = operands[0]; - xops[1] = GEN_INT (0xffff); - if (REG_P (operands[0]) - && i386_cc_probably_useless_p (insn)) - { - CC_STATUS_INIT; - output_asm_insn (AS2 (xor%L0,%1,%k0), xops); - } - else - output_asm_insn (AS2 (xor%W0,%1,%0), xops); - RET; - } - else - { - if (REG_P (operands[0]) - && i386_cc_probably_useless_p (insn)) - { - CC_STATUS_INIT; - return AS1 (not%L0,%k0); - } - return AS1 (not%W0,%0); - } -}") + "ix86_unary_operator_ok (NOT, HImode, operands)" + "not{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_insn "*one_cmplhi2_2" + [(set (reg 17) + (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (not:HI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NEG, HImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "HI")]) -(define_insn "one_cmplqi2" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") - (not:QI (match_operand:QI 1 "nonimmediate_operand" "0")))] - "" - "* -{ - /* A Pentium NOT is not pariable. Output it only in case of complex - memory address, because XOR will be inpariable anyway because - of immediate/displacement rule. */ +(define_split + [(set (reg 17) + (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO (xor:HI (match_dup 1) (const_int -1)) + (const_int 0))) + (set (match_dup 0) + (xor:HI (match_dup 1) (const_int -1)))])] + "") - if (TARGET_PENTIUM && !optimize_size - && (GET_CODE (operands[0]) != MEM - || memory_address_info (XEXP (operands[0], 0), 1) == 0)) - { - rtx xops[2]; - xops[0] = operands[0]; - xops[1] = GEN_INT (0xff); - output_asm_insn (AS2 (xor%B0,%1,%0), xops); - RET; - } - else - return AS1 (not%B0,%0); -}") +;; %%% Potential partial reg stall on alternative 1. What to do? +(define_expand "one_cmplqi2" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_unary_operator (NOT, QImode, operands); DONE;") + +(define_insn "*one_cmplqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))] + "ix86_unary_operator_ok (NOT, QImode, operands)" + "@ + not{b}\t%0 + not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI,SI")]) + +(define_insn "*one_cmplqi2_2" + [(set (reg 17) + (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (not:QI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, QImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_split + [(set (reg 17) + (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO (xor:QI (match_dup 1) (const_int -1)) + (const_int 0))) + (set (match_dup 0) + (xor:QI (match_dup 1) (const_int -1)))])] + "") -;;- arithmetic shift instructions +;; Arithmetic shift instructions ;; DImode shifts are implemented using the i386 "shift double" opcode, ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count ;; is variable, then the count is in %cl and the "imm" operand is dropped ;; from the assembler input. - +;; ;; This instruction shifts the target reg/mem as usual, but instead of ;; shifting in zeros, bits are shifted in from reg operand. If the insn ;; is a left shift double, bits are taken from the high order bits of ;; reg, else if the insn is a shift right double, bits are taken from the ;; low order bits of reg. So if %eax is "1234" and %edx is "5678", ;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345". - +;; ;; Since sh[lr]d does not change the `reg' operand, that is done ;; separately, making all shifts emit pairs of shift double and normal ;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to ;; support a 63 bit shift, each shift where the count is in a reg expands ;; to a pair of shifts, a branch, a shift by 32 and a label. - +;; ;; If the shift count is a constant, we need never emit more than one ;; shift pair, instead using moves and sign extension for counts greater ;; than 31. (define_expand "ashldi3" - [(set (match_operand:DI 0 "register_operand" "") - (ashift:DI (match_operand:DI 1 "register_operand" "") - (match_operand:QI 2 "nonmemory_operand" "")))] + [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashift:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))])] "" - " { - if (GET_CODE (operands[2]) != CONST_INT - || ! CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')) + if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode)) { - operands[2] = copy_to_mode_reg (QImode, operands[2]); - emit_insn (gen_ashldi3_non_const_int (operands[0], operands[1], - operands[2])); + emit_insn (gen_ashldi3_1 (operands[0], operands[1], operands[2])); + DONE; } - else - emit_insn (gen_ashldi3_const_int (operands[0], operands[1], operands[2])); - + ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE; -}") - -(define_insn "ashldi3_const_int" - [(set (match_operand:DI 0 "register_operand" "=&r") - (ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_operand" "J")))] - "CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')" - "* +}) + +(define_insn "*ashldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,r") + (match_operand:QI 2 "nonmemory_operand" "cJ,M"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)" { - rtx xops[4], low[1], high[1]; - - CC_STATUS_INIT; - - split_di (operands, 1, low, high); - xops[0] = operands[2]; - xops[1] = const1_rtx; - xops[2] = low[0]; - xops[3] = high[0]; - - if (INTVAL (xops[0]) > 31) + switch (get_attr_type (insn)) { - output_asm_insn (AS2 (mov%L3,%2,%3), xops); /* Fast shift by 32 */ - output_asm_insn (AS2 (xor%L2,%2,%2), xops); + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + if (!rtx_equal_p (operands[0], operands[1])) + abort (); + return "add{q}\t{%0, %0|%0, %0}"; - if (INTVAL (xops[0]) > 32) - { - xops[0] = GEN_INT (INTVAL (xops[0]) - 32); - output_asm_insn (AS2 (sal%L3,%0,%3), xops); /* Remaining shift */ - } + case TYPE_LEA: + if (GET_CODE (operands[2]) != CONST_INT + || (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 3) + abort (); + operands[1] = gen_rtx_MULT (DImode, operands[1], + GEN_INT (1 << INTVAL (operands[2]))); + return "lea{q}\t{%a1, %0|%0, %a1}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; } - else +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (mult:DI (match_dup 1) + (match_dup 2)))] + "operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), + DImode));") + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashldi3_cmp_rex64" + [(set (reg 17) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "e")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashift:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) { - output_asm_insn (AS3 (shld%L3,%0,%2,%3), xops); - output_asm_insn (AS2 (sal%L2,%0,%2), xops); - } - RET; -}") + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{q}\t{%0, %0|%0, %0}"; -(define_insn "ashldi3_non_const_int" - [(set (match_operand:DI 0 "register_operand" "=&r") + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "ashldi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") (ashift:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "register_operand" "c")))] - "" - "* -{ - rtx xops[5], low[1], high[1]; - - CC_STATUS_INIT; + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_CMOVE" + "#" + [(set_attr "type" "multi")]) - split_di (operands, 1, low, high); - xops[0] = operands[2]; - xops[1] = GEN_INT (32); - xops[2] = low[0]; - xops[3] = high[0]; - xops[4] = gen_label_rtx (); +(define_insn "*ashldi3_2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashift:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) - output_asm_insn (AS3_SHIFT_DOUBLE (shld%L3,%0,%2,%3), xops); - output_asm_insn (AS2 (sal%L2,%0,%2), xops); - output_asm_insn (AS2 (test%B0,%1,%b0), xops); - output_asm_insn (AS1 (je,%X4), xops); - output_asm_insn (AS2 (mov%L3,%2,%3), xops); /* Fast shift by 32 */ - output_asm_insn (AS2 (xor%L2,%2,%2), xops); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", - CODE_LABEL_NUMBER (xops[4])); - RET; -}") +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_CMOVE && reload_completed" + [(const_int 0)] + "ix86_split_ashldi (operands, operands[3]); DONE;") -(define_expand "ashlsi3" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "nonmemory_operand" "")))] +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashldi (operands, NULL_RTX); DONE;") + +(define_insn "x86_shld_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:SI (ashift:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "I,c")) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC 17))] "" + "@ + shld{l}\t{%2, %1, %0|%0, %1, %2} + shld{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "ppro_uops" "few")]) + +(define_expand "x86_shift_adj_1" + [(set (reg:CCZ 17) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 32)) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne (reg:CCZ 17) (const_int 0)) + (match_operand:SI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:SI (ne (reg:CCZ 17) (const_int 0)) + (match_operand:SI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_CMOVE" "") -(define_expand "ashlhi3" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "nonmemory_operand" "")))] +(define_expand "x86_shift_adj_2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] "" - "") +{ + rtx label = gen_label_rtx (); + rtx tmp; -(define_expand "ashlqi3" - [(set (match_operand:QI 0 "nonimmediate_operand" "") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "") - (match_operand:QI 2 "nonmemory_operand" "")))] - "" - "") + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); -;; Pattern for shifts which can be encoded into an lea instruction. -;; This is kept as a separate pattern so that regmove can optimize cases -;; where we know the source and destination must match. -;; -;; Do not expose this pattern when optimizing for size since we never want -;; to use lea when optimizing for size since mov+sal is smaller than lea. + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r") - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r") - (match_operand:SI 2 "small_shift_operand" "M,M")))] - "! optimize_size" - "* return output_ashl (insn, operands);") + emit_move_insn (operands[0], operands[1]); + emit_move_insn (operands[1], const0_rtx); -;; Generic left shift pattern to catch all cases not handled by the -;; shift pattern above. -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "cI")))] - "" - "* return output_ashl (insn, operands);") + emit_label (label); + LABEL_NUSES (label) = 1; -(define_insn "" - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,r") - (match_operand:HI 2 "small_shift_operand" "M,M")))] - "! optimize_size" - "* return output_ashl (insn, operands);") + DONE; +}) -(define_insn "" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:HI 2 "nonmemory_operand" "cI")))] +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] "" - "* return output_ashl (insn, operands);") + "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;") -(define_insn "" - [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,q") - (match_operand:QI 2 "small_shift_operand" "M,M")))] - "! optimize_size" - "* return output_ashl (insn, operands);") +(define_insn "*ashlsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,r") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + if (!rtx_equal_p (operands[0], operands[1])) + abort (); + return "add{l}\t{%0, %0|%0, %0}"; -;; Generic left shift pattern to catch all cases not handled by the -;; shift pattern above. -(define_insn "" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))] - "" - "* return output_ashl (insn, operands);") + case TYPE_LEA: + return "#"; -;; See comment above `ashldi3' about how this works. + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), + Pmode)); + pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +(define_insn "*ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,r") + (match_operand:QI 2 "nonmemory_operand" "cI,M")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{l}\t{%k0, %k0|%k0, %k0}"; -(define_expand "ashrdi3" + case TYPE_LEA: + return "#"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split [(set (match_operand:DI 0 "register_operand" "") - (ashiftrt:DI (match_operand:DI 1 "register_operand" "") - (match_operand:QI 2 "nonmemory_operand" "")))] - "" - " + (zero_extend:DI (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))) + (clobber (reg:CC 17))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = GEN_INT (trunc_int_for_mode (1 << INTVAL (operands[2]), + Pmode)); +}) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlsi3_cmp" + [(set (reg 17) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashift:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { - if (GET_CODE (operands[2]) != CONST_INT - || ! CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')) + switch (get_attr_type (insn)) { - operands[2] = copy_to_mode_reg (QImode, operands[2]); - emit_insn (gen_ashrdi3_non_const_int (operands[0], operands[1], - operands[2])); - } - else - emit_insn (gen_ashrdi3_const_int (operands[0], operands[1], operands[2])); - - DONE; -}") + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{l}\t{%0, %0|%0, %0}"; -(define_insn "ashldi3_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,m") - (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "ro,r") - (const_int 32)))] - "" - "* + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cmp_zext" + [(set (reg 17) + (compare + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { - rtx low[2], high[2], xops[4]; + switch (get_attr_type (insn)) + { + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{l}\t{%k0, %k0|%k0, %k0}"; - split_di (operands, 2, low, high); - xops[0] = high[0]; - xops[1] = low[1]; - xops[2] = low[0]; - xops[3] = const0_rtx; - if (!rtx_equal_p (xops[0], xops[1])) - output_asm_insn (AS2 (mov%L0,%1,%0), xops); + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) - if (GET_CODE (low[0]) == MEM) - output_asm_insn (AS2 (mov%L2,%3,%2), xops); - else - output_asm_insn (AS2 (xor%L2,%2,%2), xops); +(define_expand "ashlhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;") - RET; -}") +(define_insn "*ashlhi3_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,r") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC 17))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{w}\t{%0, %0|%0, %0}"; -(define_insn "ashrdi3_const_int" - [(set (match_operand:DI 0 "register_operand" "=&r") - (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_operand" "J")))] - "CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')" - "* + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI,SI")]) + +(define_insn "*ashlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI"))) + (clobber (reg:CC 17))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" { - rtx xops[4], low[1], high[1]; + switch (get_attr_type (insn)) + { + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{w}\t{%0, %0|%0, %0}"; - CC_STATUS_INIT; + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlhi3_cmp" + [(set (reg 17) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{w}\t{%0, %0|%0, %0}"; - split_di (operands, 1, low, high); - xops[0] = operands[2]; - xops[1] = const1_rtx; - xops[2] = low[0]; - xops[3] = high[0]; + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) - if (INTVAL (xops[0]) > 31) +(define_expand "ashlqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? + +(define_insn "*ashlqi3_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,r") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + (clobber (reg:CC 17))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) { - xops[1] = GEN_INT (31); - output_asm_insn (AS2 (mov%L2,%3,%2), xops); - output_asm_insn (AS2 (sar%L3,%1,%3), xops); /* shift by 32 */ + case TYPE_LEA: + return "#"; + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t{%k0, %k0|%k0, %k0}"; + else + return "add{b}\t{%0, %0|%0, %0}"; - if (INTVAL (xops[0]) > 32) - { - xops[0] = GEN_INT (INTVAL (xops[0]) - 32); - output_asm_insn (AS2 (sar%L2,%0,%2), xops); /* Remaining shift */ + default: + if (REG_P (operands[2])) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else + return "sal{b}\t{%b2, %0|%0, %b2}"; + } + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; } } - else +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI,SI,SI")]) + +(define_insn "*ashlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) + (clobber (reg:CC 17))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) { - output_asm_insn (AS3 (shrd%L2,%0,%3,%2), xops); - output_asm_insn (AS2 (sar%L3,%0,%3), xops); + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t{%k0, %k0|%k0, %k0}"; + else + return "add{b}\t{%0, %0|%0, %0}"; + + default: + if (REG_P (operands[2])) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else + return "sal{b}\t{%b2, %0|%0, %b2}"; + } + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI,SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlqi3_cmp" + [(set (reg 17) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashift:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + if (operands[2] != const1_rtx) + abort (); + return "add{b}\t{%0, %0|%0, %0}"; - RET; -}") + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 1 + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +;; See comment above `ashldi3' about how this works. + +(define_expand "ashrdi3" + [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))])] + "" +{ + if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode)) + { + emit_insn (gen_ashrdi3_1 (operands[0], operands[1], operands[2])); + DONE; + } + ix86_expand_binary_operator (ASHIFTRT, DImode, operands); + DONE; +}) + +(define_insn "ashrdi3_63_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") + (match_operand:DI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && INTVAL (operands[2]) == 63 && (TARGET_USE_CLTD || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + {cqto|cqo} + sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + sar{q}\t{%2, %0|%0, %2} + sar{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrdi3_one_bit_cmp_rex64" + [(set (reg 17) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrdi3_cmp_rex64" + [(set (reg 17) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + + +(define_insn "ashrdi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_CMOVE" + "#" + [(set_attr "type" "multi")]) -(define_insn "ashrdi3_non_const_int" - [(set (match_operand:DI 0 "register_operand" "=&r") +(define_insn "*ashrdi3_2" + [(set (match_operand:DI 0 "register_operand" "=r") (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "register_operand" "c")))] + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_CMOVE && reload_completed" + [(const_int 0)] + "ix86_split_ashrdi (operands, operands[3]); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_ashrdi (operands, NULL_RTX); DONE;") + +(define_insn "x86_shrd_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m") + (ior:SI (ashiftrt:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "I,c")) + (ashift:SI (match_operand:SI 1 "register_operand" "r,r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC 17))] + "" + "@ + shrd{l}\t{%2, %1, %0|%0, %1, %2} + shrd{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "pent_pair" "np") + (set_attr "ppro_uops" "few") + (set_attr "mode" "SI")]) + +(define_expand "x86_shift_adj_3" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] "" - "* { - rtx xops[5], low[1], high[1]; + rtx label = gen_label_rtx (); + rtx tmp; - CC_STATUS_INIT; + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); - split_di (operands, 1, low, high); - xops[0] = operands[2]; - xops[1] = GEN_INT (32); - xops[2] = low[0]; - xops[3] = high[0]; - xops[4] = gen_label_rtx (); + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; - output_asm_insn (AS3_SHIFT_DOUBLE (shrd%L2,%0,%3,%2), xops); - output_asm_insn (AS2 (sar%L3,%0,%3), xops); - output_asm_insn (AS2 (test%B0,%1,%b0), xops); - output_asm_insn (AS1 (je,%X4), xops); - xops[1] = GEN_INT (31); - output_asm_insn (AS2 (mov%L2,%3,%2), xops); - output_asm_insn (AS2 (sar%L3,%1,%3), xops); /* shift by 32 */ - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", - CODE_LABEL_NUMBER (xops[4])); - RET; -}") + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) (define_insn "ashrsi3_31" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,d") - (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,a") - (const_int 31)))] - "!TARGET_PENTIUM || optimize_size" + [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC 17))] + "INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_size) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" "@ - sar%L0 $31,%0 - cltd") + {cltd|cdq} + sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_31_zext" + [(set (match_operand:DI 0 "register_operand" "=*d,r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && (TARGET_USE_CLTD || optimize_size) + && INTVAL (operands[2]) == 31 + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "" + "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;") -(define_insn "ashrsi3" +(define_insn "*ashrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (sar%L0,%b2,%0); - else - return AS2 (sar%L0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "sar{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*ashrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + sar{l}\t{%2, %0|%0, %2} + sar{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + sar{l}\t{%2, %k0|%k0, %2} + sar{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrsi3_one_bit_cmp" + [(set (reg 17) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrsi3_one_bit_cmp_zext" + [(set (reg 17) + (compare + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrsi3_cmp" + [(set (reg 17) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cmp_zext" + [(set (reg 17) + (compare + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "ashrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;") -(define_insn "ashrhi3" +(define_insn "*ashrhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:HI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (sar%W0,%b2,%0); - else - return AS2 (sar%W0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "@ + sar{w}\t{%2, %0|%0, %2} + sar{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrhi3_one_bit_cmp" + [(set (reg 17) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrhi3_cmp" + [(set (reg 17) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "ashrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;") -(define_insn "ashrqi3" +(define_insn "*ashrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (sar%B0,%b2,%0); - else - return AS2 (sar%B0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "@ + sar{b}\t{%2, %0|%0, %2} + sar{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrqi3_one_bit_cmp" + [(set (reg 17) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=rm") + (ashiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrqi3_cmp" + [(set (reg 17) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=rm") + (ashiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) -;;- logical shift instructions +;; Logical shift instructions ;; See comment above `ashldi3' about how this works. (define_expand "lshrdi3" - [(set (match_operand:DI 0 "register_operand" "") - (lshiftrt:DI (match_operand:DI 1 "register_operand" "") - (match_operand:QI 2 "nonmemory_operand" "")))] + [(parallel [(set (match_operand:DI 0 "shiftdi_operand" "") + (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))])] "" - " { - if (GET_CODE (operands[2]) != CONST_INT - || ! CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')) + if (!TARGET_64BIT && TARGET_CMOVE && ! immediate_operand (operands[2], QImode)) { - operands[2] = copy_to_mode_reg (QImode, operands[2]); - emit_insn (gen_lshrdi3_non_const_int (operands[0], operands[1], - operands[2])); + emit_insn (gen_lshrdi3_1 (operands[0], operands[1], operands[2])); + DONE; } - else - emit_insn (gen_lshrdi3_const_int (operands[0], operands[1], operands[2])); - + ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE; -}") - -(define_insn "lshrdi3_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,m") - (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "ro,r") - (const_int 32)))] - "" - "* -{ - rtx low[2], high[2], xops[4]; - - split_di (operands, 2, low, high); - xops[0] = low[0]; - xops[1] = high[1]; - xops[2] = high[0]; - xops[3] = const0_rtx; - if (!rtx_equal_p (xops[0], xops[1])) - output_asm_insn (AS2 (mov%L0,%1,%0), xops); - - if (GET_CODE (low[0]) == MEM) - output_asm_insn (AS2 (mov%L2,%3,%2), xops); - else - output_asm_insn (AS2 (xor%L2,%2,%2), xops); - - RET; -}") - -(define_insn "lshrdi3_const_int" - [(set (match_operand:DI 0 "register_operand" "=&r") +}) + +(define_insn "*lshrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{q}\t{%2, %0|%0, %2} + shr{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrdi3_cmp_one_bit_rex64" + [(set (reg 17) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrdi3_cmp_rex64" + [(set (reg 17) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "e")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "lshrdi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_operand" "J")))] - "CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'J')" - "* -{ - rtx xops[4], low[1], high[1]; - - CC_STATUS_INIT; - - split_di (operands, 1, low, high); - xops[0] = operands[2]; - xops[1] = const1_rtx; - xops[2] = low[0]; - xops[3] = high[0]; - - if (INTVAL (xops[0]) > 31) - { - output_asm_insn (AS2 (mov%L2,%3,%2), xops); /* Fast shift by 32 */ - output_asm_insn (AS2 (xor%L3,%3,%3), xops); - - if (INTVAL (xops[0]) > 32) - { - xops[0] = GEN_INT (INTVAL (xops[0]) - 32); - output_asm_insn (AS2 (shr%L2,%0,%2), xops); /* Remaining shift */ - } - } - else - { - output_asm_insn (AS3 (shrd%L2,%0,%3,%2), xops); - output_asm_insn (AS2 (shr%L3,%0,%3), xops); - } - - RET; -}") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_CMOVE" + "#" + [(set_attr "type" "multi")]) -(define_insn "lshrdi3_non_const_int" - [(set (match_operand:DI 0 "register_operand" "=&r") +(define_insn "*lshrdi3_2" + [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") - (match_operand:QI 2 "register_operand" "c")))] - "" - "* -{ - rtx xops[5], low[1], high[1]; + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) - CC_STATUS_INIT; +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_CMOVE && reload_completed" + [(const_int 0)] + "ix86_split_lshrdi (operands, operands[3]); DONE;") - split_di (operands, 1, low, high); - xops[0] = operands[2]; - xops[1] = GEN_INT (32); - xops[2] = low[0]; - xops[3] = high[0]; - xops[4] = gen_label_rtx (); +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_lshrdi (operands, NULL_RTX); DONE;") - output_asm_insn (AS3_SHIFT_DOUBLE (shrd%L2,%0,%3,%2), xops); - output_asm_insn (AS2 (shr%L3,%0,%3), xops); - output_asm_insn (AS2 (test%B0,%1,%b0), xops); - output_asm_insn (AS1 (je,%X4), xops); - output_asm_insn (AS2 (mov%L2,%3,%2), xops); /* Fast shift by 32 */ - output_asm_insn (AS2 (xor%L3,%3,%3), xops); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", - CODE_LABEL_NUMBER (xops[4])); - RET; -}") +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "" + "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;") -(define_insn "lshrsi3" +(define_insn "*lshrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (shr%L0,%b2,%0); - else - return AS2 (shr%L0,%2,%1); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "shr{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*lshrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{l}\t{%2, %0|%0, %2} + shr{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{l}\t{%2, %k0|%k0, %2} + shr{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrsi3_one_bit_cmp" + [(set (reg 17) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrsi3_cmp_one_bit_zext" + [(set (reg 17) + (compare + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrsi3_cmp" + [(set (reg 17) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_cmp_zext" + [(set (reg 17) + (compare + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "lshrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;") -(define_insn "lshrhi3" +(define_insn "*lshrhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:HI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (shr%W0,%b2,%0); - else - return AS2 (shr%W0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{w}\t{%2, %0|%0, %2} + shr{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrhi3_one_bit_cmp" + [(set (reg 17) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrhi3_cmp" + [(set (reg 17) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "lshrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;") -(define_insn "lshrqi3" +(define_insn "*lshrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (shr%B0,%b2,%0); - else - return AS2 (shr%B0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "@ + shr{b}\t{%2, %0|%0, %2} + shr{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrqi2_one_bit_cmp" + [(set (reg 17) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrqi2_cmp" + [(set (reg 17) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "immediate_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) -;;- rotate instructions +;; Rotate instructions -(define_insn "rotlsi3" +(define_expand "rotldi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "ix86_expand_binary_operator (ROTATE, DImode, operands); DONE;") + +(define_insn "*rotlsi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "rol{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "e,c"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)" + "@ + rol{q}\t{%2, %0|%0, %2} + rol{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "" + "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;") + +(define_insn "*rotlsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (rol%L0,%b2,%0); - else - return AS2 (rol%L0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATE, SImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "rol{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "rol{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*rotlsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATE, SImode, operands)" + "@ + rol{l}\t{%2, %0|%0, %2} + rol{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*rotlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)" + "@ + rol{l}\t{%2, %k0|%k0, %2} + rol{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "rotlhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;") -(define_insn "rotlhi3" +(define_insn "*rotlhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:HI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (rol%W0,%b2,%0); - else - return AS2 (rol%W0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATE, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "rol{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATE, HImode, operands)" + "@ + rol{w}\t{%2, %0|%0, %2} + rol{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) -(define_insn "rotlqi3" +(define_expand "rotlqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;") + +(define_insn "*rotlqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))] + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATE, QImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "rol{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATE, QImode, operands)" + "@ + rol{b}\t{%2, %0|%0, %2} + rol{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "ix86_expand_binary_operator (ROTATERT, DImode, operands); DONE;") + +(define_insn "*rotrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "ror{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)" + "@ + ror{q}\t{%2, %0|%0, %2} + ror{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] "" - "* -{ - if (REG_P (operands[2])) - return AS2 (rol%B0,%b2,%0); - else - return AS2 (rol%B0,%2,%0); -}") + "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;") -(define_insn "rotrsi3" +(define_insn "*rotrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (ror%L0,%b2,%0); - else - return AS2 (ror%L0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATERT, SImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "ror{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_int_1_operand" "")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "ror{l}\t%k0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "@ + ror{l}\t{%2, %0|%0, %2} + ror{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*rotrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC 17))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "@ + ror{l}\t{%2, %k0|%k0, %2} + ror{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "rotrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;") -(define_insn "rotrhi3" +(define_insn "*rotrhi3_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:HI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (ror%W0,%b2,%0); - else - return AS2 (ror%W0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATERT, HImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "ror{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATERT, HImode, operands)" + "@ + ror{w}\t{%2, %0|%0, %2} + ror{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "rotrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC 17))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;") -(define_insn "rotrqi3" +(define_insn "*rotrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))] - "" - "* -{ - if (REG_P (operands[2])) - return AS2 (ror%B0,%b2,%0); - else - return AS2 (ror%B0,%2,%0); -}") + (match_operand:QI 2 "const_int_1_operand" ""))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATERT, QImode, operands) + && (TARGET_PENTIUM || TARGET_PENTIUMPRO)" + "ror{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (ROTATERT, QImode, operands)" + "@ + ror{b}\t{%2, %0|%0, %2} + ror{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) -/* -;; This usually looses. But try a define_expand to recognize a few case -;; we can do efficiently, such as accessing the "high" QImode registers, -;; %ah, %bh, %ch, %dh. -;; ??? Note this has a botch on the mode of operand 0, which needs to be -;; fixed if this is ever enabled. -(define_insn "insv" - [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+&r") - (match_operand:SI 1 "immediate_operand" "i") - (match_operand:SI 2 "immediate_operand" "i")) - (match_operand:SI 3 "nonmemory_operand" "ri"))] - "" - "* -{ - if (INTVAL (operands[1]) + INTVAL (operands[2]) > GET_MODE_BITSIZE (SImode)) - abort (); - if (GET_CODE (operands[3]) == CONST_INT) - { - unsigned int mask = (1 << INTVAL (operands[1])) - 1; - operands[1] = GEN_INT (~(mask << INTVAL (operands[2]))); - output_asm_insn (AS2 (and%L0,%1,%0), operands); - operands[3] = GEN_INT (INTVAL (operands[3]) << INTVAL (operands[2])); - output_asm_insn (AS2 (or%L0,%3,%0), operands); - } - else - { - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); - if (INTVAL (operands[2])) - output_asm_insn (AS2 (ror%L0,%2,%0), operands); - output_asm_insn (AS3 (shrd%L0,%1,%3,%0), operands); - operands[2] = GEN_INT (BITS_PER_WORD - - INTVAL (operands[1]) - INTVAL (operands[2])); - if (INTVAL (operands[2])) - output_asm_insn (AS2 (ror%L0,%2,%0), operands); - } - RET; -}") -*/ -/* -;; ??? There are problems with the mode of operand[3]. The point of this -;; is to represent an HImode move to a "high byte" register. +;; Bit set / bit test instructions -(define_expand "insv" - [(set (zero_extract:SI (match_operand:SI 0 "general_operand" "") - (match_operand:SI 1 "immediate_operand" "") - (match_operand:SI 2 "immediate_operand" "")) - (match_operand:QI 3 "nonmemory_operand" "ri"))] +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")))] "" - " { - if (GET_CODE (operands[1]) != CONST_INT - || GET_CODE (operands[2]) != CONST_INT) + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) FAIL; - if (! (INTVAL (operands[1]) == 8 - && (INTVAL (operands[2]) == 8 || INTVAL (operands[2]) == 0)) - && ! INTVAL (operands[1]) == 1) + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! register_operand (operands[1], VOIDmode)) FAIL; -}") -*/ - -;; On i386, the register count for a bit operation is *not* truncated, -;; so SHIFT_COUNT_TRUNCATED must not be defined. - -;; On i486, the shift & or/and code is faster than bts or btr. If -;; operands[0] is a MEM, the bt[sr] is half as fast as the normal code. - -;; On i386, bts is a little faster if operands[0] is a reg, and a -;; little slower if operands[0] is a MEM, than the shift & or/and code. -;; Use bts & btr, since they reload better. - -;; General bit set and clear. -(define_insn "" - [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+rm") - (const_int 1) - (match_operand:SI 2 "register_operand" "r")) - (match_operand:SI 3 "const_int_operand" "n"))] - "TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT" - "* -{ - CC_STATUS_INIT; - - if (INTVAL (operands[3]) == 1) - return AS2 (bts%L0,%2,%0); - else - return AS2 (btr%L0,%2,%0); -}") - -;; Bit complement. See comments on previous pattern. -;; ??? Is this really worthwhile? -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") - (xor:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "register_operand" "r")) - (match_operand:SI 2 "nonimmediate_operand" "0")))] - "TARGET_USE_BIT_TEST && GET_CODE (operands[1]) != CONST_INT" - "* -{ - CC_STATUS_INIT; - - return AS2 (btc%L0,%1,%0); -}") +}) -(define_insn "" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") - (xor:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (ashift:SI (const_int 1) - (match_operand:SI 2 "register_operand" "r"))))] - "TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT" - "* -{ - CC_STATUS_INIT; - - return AS2 (btc%L0,%2,%0); -}") - -;; Recognizers for bit-test instructions. - -;; The bt opcode allows a MEM in operands[0]. But on both i386 and -;; i486, it is faster to copy a MEM to REG and then use bt, than to use -;; bt on the MEM directly. - -;; ??? The first argument of a zero_extract must not be reloaded, so -;; don't allow a MEM in the operand predicate without allowing it in the -;; constraint. - -(define_insn "" - [(set (cc0) (zero_extract (match_operand:SI 0 "register_operand" "r") - (const_int 1) - (match_operand:SI 1 "register_operand" "r")))] - "GET_CODE (operands[1]) != CONST_INT" - "* -{ - cc_status.flags |= CC_Z_IN_NOT_C; - return AS2 (bt%L0,%1,%0); -}") - -(define_insn "" - [(set (cc0) (zero_extract (match_operand:SI 0 "register_operand" "r") - (match_operand:SI 1 "const_int_operand" "n") - (match_operand:SI 2 "const_int_operand" "n")))] +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand 1 "ext_register_operand" "") + (match_operand:SI 2 "immediate_operand" "") + (match_operand:SI 3 "immediate_operand" "")))] "" - "* { - unsigned int mask; - - mask = ((1 << INTVAL (operands[1])) - 1) << INTVAL (operands[2]); - operands[1] = GEN_INT (mask); - - if (QI_REG_P (operands[0]) - /* A Pentium test is pairable only with eax. Not with ah or al. */ - && (! REG_P (operands[0]) || REGNO (operands[0]) || !TARGET_PENTIUM - || optimize_size)) - { - if ((mask & ~0xff) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - return AS2 (test%B0,%1,%b0); - } - - if ((mask & ~0xff00) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - operands[1] = GEN_INT (mask >> 8); - return AS2 (test%B0,%1,%h0); - } - } - - return AS2 (test%L0,%1,%0); -}") + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; -;; ??? All bets are off if operand 0 is a volatile MEM reference. -;; The CPU may access unspecified bytes around the actual target byte. + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! register_operand (operands[1], VOIDmode)) + FAIL; +}) -(define_insn "" - [(set (cc0) (zero_extract (match_operand:QI 0 "memory_operand" "m") - (match_operand:SI 1 "const_int_operand" "n") - (match_operand:SI 2 "const_int_operand" "n")))] - "GET_CODE (operands[0]) != MEM || ! MEM_VOLATILE_P (operands[0])" - "* +(define_expand "insv" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")) + (match_operand:SI 3 "register_operand" ""))] + "" { - unsigned int mask; - - mask = ((1 << INTVAL (operands[1])) - 1) << INTVAL (operands[2]); - operands[1] = GEN_INT (mask); - - if ((! REG_P (operands[0]) || QI_REG_P (operands[0])) - /* A Pentium test is pairable only with eax. Not with ah or al. */ - && (! REG_P (operands[0]) || REGNO (operands[0]) || !TARGET_PENTIUM - || optimize_size)) - { - if ((mask & ~0xff) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - return AS2 (test%B0,%1,%b0); - } - - if ((mask & ~0xff00) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - operands[1] = GEN_INT (mask >> 8); - - if (QI_REG_P (operands[0])) - return AS2 (test%B0,%1,%h0); - else - { - operands[0] = adj_offsettable_operand (operands[0], 1); - return AS2 (test%B0,%1,%b0); - } - } - - if (GET_CODE (operands[0]) == MEM && (mask & ~0xff0000) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - operands[1] = GEN_INT (mask >> 16); - operands[0] = adj_offsettable_operand (operands[0], 2); - return AS2 (test%B0,%1,%b0); - } - - if (GET_CODE (operands[0]) == MEM && (mask & ~0xff000000) == 0) - { - cc_status.flags |= CC_NOT_NEGATIVE; - operands[1] = GEN_INT (mask >> 24); - operands[0] = adj_offsettable_operand (operands[0], 3); - return AS2 (test%B0,%1,%b0); - } - } + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) + FAIL; - if (CONSTANT_P (operands[1]) || GET_CODE (operands[0]) == MEM) - return AS2 (test%L0,%1,%0); + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! register_operand (operands[0], VOIDmode)) + FAIL; +}) - return AS2 (test%L1,%0,%1); -}") +;; %%% bts, btr, btc, bt. ;; Store-flag instructions. ;; For all sCOND expanders, also expand the compare or test insn that ;; generates cc0. Generate an equality comparison if `seq' or `sne'. +;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way +;; to avoid partial register stalls. Otherwise do things the setcc+movzx +;; way, which can later delete the movzx if only QImode is needed. + (define_expand "seq" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (eq:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (eq:QI (reg:CC 17) (const_int 0)))] "" - " -{ - if (TARGET_IEEE_FP - && GET_MODE_CLASS (GET_MODE (i386_compare_op0)) == MODE_FLOAT) - operands[1] = (*i386_compare_gen_eq)(i386_compare_op0, i386_compare_op1); - else - operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); -}") + "if (ix86_expand_setcc (EQ, operands[0])) DONE; else FAIL;") (define_expand "sne" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (ne:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (ne:QI (reg:CC 17) (const_int 0)))] "" - " -{ - if (TARGET_IEEE_FP - && GET_MODE_CLASS (GET_MODE (i386_compare_op0)) == MODE_FLOAT) - operands[1] = (*i386_compare_gen_eq)(i386_compare_op0, i386_compare_op1); - else - operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); -}") + "if (ix86_expand_setcc (NE, operands[0])) DONE; else FAIL;") (define_expand "sgt" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (gt:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (gt:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (GT, operands[0])) DONE; else FAIL;") (define_expand "sgtu" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (gtu:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (gtu:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (GTU, operands[0])) DONE; else FAIL;") (define_expand "slt" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (lt:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (lt:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (LT, operands[0])) DONE; else FAIL;") (define_expand "sltu" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (ltu:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (ltu:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (LTU, operands[0])) DONE; else FAIL;") (define_expand "sge" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (ge:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (ge:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (GE, operands[0])) DONE; else FAIL;") (define_expand "sgeu" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (geu:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (geu:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (GEU, operands[0])) DONE; else FAIL;") (define_expand "sle" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (le:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (le:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (LE, operands[0])) DONE; else FAIL;") (define_expand "sleu" - [(match_dup 1) - (set (match_operand:QI 0 "register_operand" "") - (leu:QI (cc0) (const_int 0)))] + [(set (match_operand:QI 0 "register_operand" "") + (leu:QI (reg:CC 17) (const_int 0)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "if (ix86_expand_setcc (LEU, operands[0])) DONE; else FAIL;") -;; The 386 sCOND opcodes can write to memory. But a gcc sCOND insn may -;; not have any input reloads. A MEM write might need an input reload -;; for the address of the MEM. So don't allow MEM as the SET_DEST. +(define_expand "sunordered" + [(set (match_operand:QI 0 "register_operand" "") + (unordered:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNORDERED, operands[0])) DONE; else FAIL;") -(define_insn "*setcc" +(define_expand "sordered" + [(set (match_operand:QI 0 "register_operand" "") + (ordered:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387" + "if (ix86_expand_setcc (ORDERED, operands[0])) DONE; else FAIL;") + +(define_expand "suneq" + [(set (match_operand:QI 0 "register_operand" "") + (uneq:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNEQ, operands[0])) DONE; else FAIL;") + +(define_expand "sunge" + [(set (match_operand:QI 0 "register_operand" "") + (unge:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNGE, operands[0])) DONE; else FAIL;") + +(define_expand "sungt" + [(set (match_operand:QI 0 "register_operand" "") + (ungt:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNGT, operands[0])) DONE; else FAIL;") + +(define_expand "sunle" + [(set (match_operand:QI 0 "register_operand" "") + (unle:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNLE, operands[0])) DONE; else FAIL;") + +(define_expand "sunlt" + [(set (match_operand:QI 0 "register_operand" "") + (unlt:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (UNLT, operands[0])) DONE; else FAIL;") + +(define_expand "sltgt" + [(set (match_operand:QI 0 "register_operand" "") + (ltgt:QI (reg:CC 17) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (LTGT, operands[0])) DONE; else FAIL;") + +(define_insn "*setcc_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") - (match_operator:QI 1 "comparison_operator" [(cc0) (const_int 0)]))] - "reload_completed || register_operand (operands[0], QImode)" - "* + (match_operator:QI 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +(define_insn "setcc_2" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (match_operator:QI 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; sete %al + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] { - enum rtx_code code = GET_CODE (operands[1]); - if (cc_prev_status.flags & CC_TEST_AX) - { - int eq; - HOST_WIDE_INT c; - operands[2] = gen_rtx_REG (SImode, 0); - switch (code) - { - case EQ: - c = 0x4000; - eq = 0; - break; - case NE: - c = 0x4000; - eq = 1; - break; - case GT: - c = 0x4100; - eq = 1; - break; - case LT: - c = 0x100; - eq = 0; - break; - case GE: - c = 0x100; - eq = 1; - break; - case LE: - c = 0x4100; - eq = 0; - break; - default: - abort (); - } - if (!TARGET_PENTIUM || optimize_size) - { - operands[3] = GEN_INT (c >> 8); - output_asm_insn (AS2 (test%B0,%3,%h2), operands); - } - else - { - operands[3] = GEN_INT (c); - output_asm_insn (AS2 (test%L0,%3,%2), operands); - } - return eq ? AS1 (sete,%0) : AS1 (setne, %0); - } + PUT_MODE (operands[1], QImode); +}) - if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) - return (char *)0; - return AS1(set%D1,%0); -}") +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + PUT_MODE (operands[1], QImode); +}) +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, REVERSE_CONDITION (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, REVERSE_CONDITION (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +;; The SSE store flag instructions saves 0 or 0xffffffff to the result. +;; subsequent logical operations are used to imitate conditional moves. +;; 0xffffffff is NaN, but not in normalized form, so we can't represent +;; it directly. Futher holding this value in pseudo register might bring +;; problem in implicit normalization in spill code. +;; So we don't define FLOAT_STORE_FLAG_VALUE and create these +;; instructions after reload by splitting the conditional move patterns. + +(define_insn "*sse_setccsf" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 1 "sse_comparison_operator" + [(match_operand:SF 2 "register_operand" "0") + (match_operand:SF 3 "nonimmediate_operand" "xm")]))] + "TARGET_SSE && reload_completed" + "cmp%D1ss\t{%3, %0|%0, %3}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_insn "*sse_setccdf" + [(set (match_operand:DF 0 "register_operand" "=Y") + (match_operator:DF 1 "sse_comparison_operator" + [(match_operand:DF 2 "register_operand" "0") + (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] + "TARGET_SSE2 && reload_completed" + "cmp%D1sd\t{%3, %0|%0, %3}" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) ;; Basic conditional jump instructions. ;; We ignore the overflow flag for signed branch instructions. ;; For all bCOND expanders, also expand the compare or test insn that -;; generates cc0. Generate an equality comparison if `beq' or `bne'. +;; generates reg 17. Generate an equality comparison if `beq' or `bne'. (define_expand "beq" - [(match_dup 1) - (set (pc) - (if_then_else (eq (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - " -{ - if (TARGET_IEEE_FP - && GET_MODE_CLASS (GET_MODE (i386_compare_op0)) == MODE_FLOAT) - operands[1] = (*i386_compare_gen_eq)(i386_compare_op0, i386_compare_op1); - else - operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); -}") + "ix86_expand_branch (EQ, operands[0]); DONE;") (define_expand "bne" - [(match_dup 1) - (set (pc) - (if_then_else (ne (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - " -{ - if (TARGET_IEEE_FP - && GET_MODE_CLASS (GET_MODE (i386_compare_op0)) == MODE_FLOAT) - operands[1] = (*i386_compare_gen_eq)(i386_compare_op0, i386_compare_op1); - else - operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); -}") - + "ix86_expand_branch (NE, operands[0]); DONE;") (define_expand "bgt" - [(match_dup 1) - (set (pc) - (if_then_else (gt (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (GT, operands[0]); DONE;") (define_expand "bgtu" - [(match_dup 1) - (set (pc) - (if_then_else (gtu (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (GTU, operands[0]); DONE;") (define_expand "blt" - [(match_dup 1) - (set (pc) - (if_then_else (lt (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") - + "ix86_expand_branch (LT, operands[0]); DONE;") (define_expand "bltu" - [(match_dup 1) - (set (pc) - (if_then_else (ltu (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (LTU, operands[0]); DONE;") (define_expand "bge" - [(match_dup 1) - (set (pc) - (if_then_else (ge (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (GE, operands[0]); DONE;") (define_expand "bgeu" - [(match_dup 1) - (set (pc) - (if_then_else (geu (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (GEU, operands[0]); DONE;") (define_expand "ble" - [(match_dup 1) - (set (pc) - (if_then_else (le (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (LE, operands[0]); DONE;") (define_expand "bleu" - [(match_dup 1) - (set (pc) - (if_then_else (leu (cc0) - (const_int 0)) + [(set (pc) + (if_then_else (match_dup 1) (label_ref (match_operand 0 "" "")) (pc)))] "" - "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") + "ix86_expand_branch (LEU, operands[0]); DONE;") -(define_insn "" +(define_expand "bunordered" [(set (pc) - (if_then_else (match_operator 0 "comparison_operator" - [(cc0) (const_int 0)]) - (label_ref (match_operand 1 "" "")) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) (pc)))] - "" - "* -{ - enum rtx_code code = GET_CODE (operands[0]); - if (cc_prev_status.flags & CC_TEST_AX) - { - int eq; - HOST_WIDE_INT c; - operands[2] = gen_rtx_REG (SImode, 0); - switch (code) - { - case EQ: - c = 0x4000; - eq = 0; - break; - case NE: - c = 0x4000; - eq = 1; - break; - case GT: - c = 0x4100; - eq = 1; - break; - case LT: - c = 0x100; - eq = 0; - break; - case GE: - c = 0x100; - eq = 1; - break; - case LE: - c = 0x4100; - eq = 0; - break; - default: - abort (); - } - if (!TARGET_PENTIUM || optimize_size) - { - operands[3] = GEN_INT (c >> 8); - output_asm_insn (AS2 (test%B0,%3,%h2), operands); - } - else - { - operands[3] = GEN_INT (c); - output_asm_insn (AS2 (test%L0,%3,%2), operands); - } - return eq ? AS1 (je,%l1) : AS1 (jne, %l1); - } - if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) - return (char *)0; + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (UNORDERED, operands[0]); DONE;") - return AS1(j%D0,%l1); -}") +(define_expand "bordered" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (ORDERED, operands[0]); DONE;") -(define_insn "" +(define_expand "buneq" [(set (pc) - (if_then_else (match_operator 0 "comparison_operator" - [(cc0) (const_int 0)]) - (pc) - (label_ref (match_operand 1 "" ""))))] - "" - "* -{ - enum rtx_code code = GET_CODE (operands[0]); - if (cc_prev_status.flags & CC_TEST_AX) - { - int eq; - HOST_WIDE_INT c; - operands[2] = gen_rtx_REG (SImode, 0); - switch (code) - { - case EQ: - c = 0x4000; - eq = 1; - break; - case NE: - c = 0x4000; - eq = 0; - break; - case GT: - c = 0x4100; - eq = 0; - break; - case LT: - c = 0x100; - eq = 1; - break; - case GE: - c = 0x100; - eq = 0; - break; - case LE: - c = 0x4100; - eq = 1; - break; - default: - abort (); - } - if (!TARGET_PENTIUM || optimize_size) - { - operands[3] = GEN_INT (c >> 8); - output_asm_insn (AS2 (test%B0,%3,%h2), operands); - } - else - { - operands[3] = GEN_INT (c); - output_asm_insn (AS2 (test%L0,%3,%2), operands); - } - return eq ? AS1 (je,%l1) : AS1 (jne, %l1); - } - if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) - return (char *)0; + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (UNEQ, operands[0]); DONE;") - return AS1(j%d0,%l1); -}") - -;; Unconditional and other jump instructions +(define_expand "bunge" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (UNGE, operands[0]); DONE;") -(define_insn "jump" +(define_expand "bungt" [(set (pc) - (label_ref (match_operand 0 "" "")))] - "" - "jmp %l0" - [(set_attr "memory" "none")]) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (UNGT, operands[0]); DONE;") -(define_insn "indirect_jump" - [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))] - "" - "* -{ - CC_STATUS_INIT; +(define_expand "bunle" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (UNLE, operands[0]); DONE;") - return AS1 (jmp,%*%0); -}" - [(set_attr "memory" "none")]) +(define_expand "bunlt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (UNLT, operands[0]); DONE;") -;; ??? could transform while(--i > 0) S; to if (--i > 0) do S; while(--i); -;; if S does not change i +(define_expand "bltgt" + [(set (pc) + (if_then_else (match_dup 1) + (label_ref (match_operand 0 "" "")) + (pc)))] + "TARGET_80387 || TARGET_SSE" + "ix86_expand_branch (LTGT, operands[0]); DONE;") -(define_expand "decrement_and_branch_until_zero" - [(parallel [(set (pc) - (if_then_else (ge (plus:SI (match_operand:SI 0 "general_operand" "") - (const_int -1)) - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int -1)))])] +(define_insn "*jcc_1" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] "" - "") + "%+j%C1\t%l0" + [(set_attr "type" "ibr") + (set (attr "prefix_0f") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -128)) + (lt (minus (match_dup 0) (pc)) + (const_int 124))) + (const_int 0) + (const_int 1)))]) + +(define_insn "*jcc_2" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "%+j%c1\t%l0" + [(set_attr "type" "ibr") + (set (attr "prefix_0f") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -128)) + (lt (minus (match_dup 0) (pc)) + (const_int 124))) + (const_int 0) + (const_int 1)))]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; je Lfoo -(define_insn "" +(define_split [(set (pc) - (if_then_else (match_operator 0 "arithmetic_comparison_operator" - [(plus:SI (match_operand:SI 1 "nonimmediate_operand" "+c*r,m") - (match_operand:SI 2 "general_operand" "rmi,ri")) - (const_int 0)]) - (label_ref (match_operand 3 "" "")) - (pc))) - (set (match_dup 1) - (plus:SI (match_dup 1) - (match_dup 2)))] + (if_then_else (ne (match_operator 0 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] "" - "* + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] { - CC_STATUS_INIT; - - if (GET_CODE (operands[1]) == REG && REGNO (operands[2]) == 2 && - operands[2] == constm1_rtx && ix86_cpu == PROCESSOR_K6) - return \"loop %l3\"; + PUT_MODE (operands[0], VOIDmode); +}) + +(define_split + [(set (pc) + (if_then_else (eq (match_operator 0 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + rtx new_op0 = copy_rtx (operands[0]); + operands[0] = new_op0; + PUT_MODE (new_op0, VOIDmode); + PUT_CODE (new_op0, REVERSE_CONDITION (GET_CODE (new_op0), + GET_MODE (XEXP (new_op0, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op0, VOIDmode)) + FAIL; +}) - if (operands[2] == constm1_rtx) - output_asm_insn (AS1 (dec%L1,%1), operands); +;; Define combination compare-and-branch fp compare instructions to use +;; during early optimization. Splitting the operation apart early makes +;; for bad code when we want to reverse the operation. - else if (operands[2] == const1_rtx) - output_asm_insn (AS1 (inc%L1,%1), operands); +(define_insn "*fp_jcc_1" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "TARGET_CMOVE && TARGET_80387 + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") - else - output_asm_insn (AS2 (add%L1,%2,%1), operands); +(define_insn "*fp_jcc_1_sse" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f#x,x#f") + (match_operand 2 "nonimmediate_operand" "f#x,xm#f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "TARGET_80387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") - return AS1 (%J0,%l3); -}") +(define_insn "*fp_jcc_1_sse_only" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") -(define_insn "" +(define_insn "*fp_jcc_2" [(set (pc) - (if_then_else (match_operator 0 "arithmetic_comparison_operator" - [(minus:SI (match_operand:SI 1 "nonimmediate_operand" "+r,m") - (match_operand:SI 2 "general_operand" "rmi,ri")) - (const_int 0)]) - (label_ref (match_operand 3 "" "")) - (pc))) - (set (match_dup 1) - (minus:SI (match_dup 1) - (match_dup 2)))] - "" - "* -{ - CC_STATUS_INIT; - if (operands[2] == const1_rtx) - output_asm_insn (AS1 (dec%L1,%1), operands); + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "TARGET_CMOVE && TARGET_80387 + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") - else if (operands[1] == constm1_rtx) - output_asm_insn (AS1 (inc%L1,%1), operands); +(define_insn "*fp_jcc_2_sse" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f#x,x#f") + (match_operand 2 "nonimmediate_operand" "f#x,xm#f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "TARGET_80387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") - else - output_asm_insn (AS2 (sub%L1,%2,%1), operands); +(define_insn "*fp_jcc_2_sse_only" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") - return AS1 (%J0,%l3); -}") +(define_insn "*fp_jcc_3" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") -(define_insn "" +(define_insn "*fp_jcc_4" [(set (pc) - (if_then_else (ne (match_operand:SI 0 "general_operand" "+g") - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int -1)))] - "" - "* -{ - CC_STATUS_INIT; - operands[2] = const1_rtx; - output_asm_insn (AS2 (sub%L0,%2,%0), operands); - return \"jnc %l1\"; -}") + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") -(define_insn "" +(define_insn "*fp_jcc_5" [(set (pc) - (if_then_else (eq (match_operand:SI 0 "general_operand" "+g") - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int -1)))] - "" - "* -{ - CC_STATUS_INIT; - operands[2] = const1_rtx; - output_asm_insn (AS2 (sub%L0,%2,%0), operands); - return \"jc %l1\"; -}") + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") -(define_insn "" +(define_insn "*fp_jcc_6" [(set (pc) - (if_then_else (ne (match_operand:SI 0 "general_operand" "+g") - (const_int 1)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int -1)))] - "" - "* -{ - CC_STATUS_INIT; - output_asm_insn (AS1 (dec%L0,%0), operands); - return \"jnz %l1\"; -}") + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") -(define_insn "" +(define_split [(set (pc) - (if_then_else (eq (match_operand:SI 0 "general_operand" "+g") - (const_int 1)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int -1)))] - "" - "* + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "nonimmediate_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17))] + "reload_completed" + [(const_int 0)] { - CC_STATUS_INIT; - output_asm_insn (AS1 (dec%L0,%0), operands); - return \"jz %l1\"; -}") + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], NULL_RTX); + DONE; +}) -(define_insn "" +(define_split [(set (pc) - (if_then_else (ne (match_operand:SI 0 "general_operand" "+g") - (const_int -1)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int 1)))] - "" - "* + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "nonimmediate_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP 18)) + (clobber (reg:CCFP 17)) + (clobber (match_scratch:HI 5 "=a"))] + "reload_completed" + [(set (pc) + (if_then_else (match_dup 6) + (match_dup 3) + (match_dup 4)))] { - CC_STATUS_INIT; - output_asm_insn (AS1 (inc%L0,%0), operands); - return \"jnz %l1\"; -}") + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], operands[5]); + DONE; +}) + +;; Unconditional and other jump instructions -(define_insn "" +(define_insn "jump" [(set (pc) - (if_then_else (eq (match_operand:SI 0 "general_operand" "+g") - (const_int -1)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (match_dup 0) - (plus:SI (match_dup 0) - (const_int 1)))] + (label_ref (match_operand 0 "" "")))] "" - "* -{ - CC_STATUS_INIT; - output_asm_insn (AS1 (inc%L0,%0), operands); - return \"jz %l1\"; -}") + "jmp\t%l0" + [(set_attr "type" "ibr")]) -;; Implement switch statements when generating PIC code. Switches are -;; implemented by `tablejump' when not using -fpic. - -;; Emit code here to do the range checking and make the index zero based. - -(define_expand "casesi" - [(set (match_dup 5) - (match_operand:SI 0 "general_operand" "")) - (set (match_dup 6) - (minus:SI (match_dup 5) - (match_operand:SI 1 "general_operand" ""))) - (set (cc0) - (compare:CC (match_dup 6) - (match_operand:SI 2 "general_operand" ""))) - (set (pc) - (if_then_else (gtu (cc0) - (const_int 0)) - (label_ref (match_operand 4 "" "")) - (pc))) - (parallel - [(set (pc) - (minus:SI (reg:SI 3) - (mem:SI (plus:SI (mult:SI (match_dup 6) - (const_int 4)) - (label_ref (match_operand 3 "" "")))))) - (clobber (match_scratch:SI 7 ""))])] - "flag_pic" - " -{ - operands[5] = gen_reg_rtx (SImode); - operands[6] = gen_reg_rtx (SImode); - current_function_uses_pic_offset_table = 1; -}") - -;; Implement a casesi insn. - -;; Each entry in the "addr_diff_vec" looks like this as the result of the -;; two rules below: -;; -;; .long _GLOBAL_OFFSET_TABLE_+[.-.L2] -;; -;; 1. An expression involving an external reference may only use the -;; addition operator, and only with an assembly-time constant. -;; The example above satisfies this because ".-.L2" is a constant. -;; -;; 2. The symbol _GLOBAL_OFFSET_TABLE_ is magic, and at link time is -;; given the value of "GOT - .", where GOT is the actual address of -;; the Global Offset Table. Therefore, the .long above actually -;; stores the value "( GOT - . ) + [ . - .L2 ]", or "GOT - .L2". The -;; expression "GOT - .L2" by itself would generate an error from as(1). -;; -;; The pattern below emits code that looks like this: -;; -;; movl %ebx,reg -;; subl TABLE@GOTOFF(%ebx,index,4),reg -;; jmp reg -;; -;; The addr_diff_vec contents may be directly referenced with @GOTOFF, since -;; the addr_diff_vec is known to be part of this module. -;; -;; The subl above calculates "GOT - (( GOT - . ) + [ . - .L2 ])", which -;; evaluates to just ".L2". +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))] + "" + "") -(define_insn "" - [(set (pc) - (minus:SI (reg:SI 3) - (mem:SI (plus:SI - (mult:SI (match_operand:SI 0 "register_operand" "r") - (const_int 4)) - (label_ref (match_operand 1 "" "")))))) - (clobber (match_scratch:SI 2 "=&r"))] +(define_insn "*indirect_jump" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))] + "!TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "*indirect_jump_rtx64" + [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))] + "TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))])] "" - "* { - rtx xops[4]; + /* In PIC mode, the table entries are stored GOT-relative. Convert + the relative address to an absolute address. */ + if (flag_pic) + { + if (TARGET_64BIT) + operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], + gen_rtx_LABEL_REF (Pmode, operands[1]), + NULL_RTX, 0, + OPTAB_DIRECT); + else if (HAVE_AS_GOTOFF_IN_DATA) + { + operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], + pic_offset_table_rtx, NULL_RTX, + 1, OPTAB_DIRECT); + current_function_uses_pic_offset_table = 1; + } + else + { + operands[0] = expand_simple_binop (Pmode, MINUS, pic_offset_table_rtx, + operands[0], NULL_RTX, 1, + OPTAB_DIRECT); + current_function_uses_pic_offset_table = 1; + } + } +}) - xops[0] = operands[0]; - xops[1] = operands[1]; - xops[2] = operands[2]; - xops[3] = pic_offset_table_rtx; +(define_insn "*tablejump_1" + [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))] + "!TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) - output_asm_insn (AS2 (mov%L2,%3,%2), xops); - output_asm_insn (\"sub%L2 %l1@GOTOFF(%3,%0,4),%2\", xops); - output_asm_insn (AS1 (jmp,%*%2), xops); - ASM_OUTPUT_ALIGN (asm_out_file, i386_align_jumps); - RET; +(define_insn "*tablejump_1_rtx64" + [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_64BIT" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +;; Loop instruction +;; +;; This is all complicated by the fact that since this is a jump insn +;; we must handle our own reloads. + +(define_expand "doloop_end" + [(use (match_operand 0 "" "")) ; loop pseudo + (use (match_operand 1 "" "")) ; iterations; zero if unknown + (use (match_operand 2 "" "")) ; max iterations + (use (match_operand 3 "" "")) ; loop level + (use (match_operand 4 "" ""))] ; label + "!TARGET_64BIT && TARGET_USE_LOOP" + " +{ + /* Only use cloop on innermost loops. */ + if (INTVAL (operands[3]) > 1) + FAIL; + if (GET_MODE (operands[0]) != SImode) + FAIL; + emit_jump_insn (gen_doloop_end_internal (operands[4], operands[0], + operands[0])); + DONE; }") -(define_insn "tablejump" - [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm")) - (use (label_ref (match_operand 1 "" "")))] - "" - "* +(define_insn "doloop_end_internal" + [(set (pc) + (if_then_else (ne (match_operand:SI 1 "register_operand" "c,?*r,?*r") + (const_int 1)) + (label_ref (match_operand 0 "" "")) + (pc))) + (set (match_operand:SI 2 "register_operand" "=1,1,*m*r") + (plus:SI (match_dup 1) + (const_int -1))) + (clobber (match_scratch:SI 3 "=X,X,r")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_USE_LOOP" { - CC_STATUS_INIT; + if (which_alternative != 0) + return "#"; + if (get_attr_length (insn) == 2) + return "%+loop\t%l0"; + else + return "dec{l}\t%1\;%+jne\t%l0"; +} + [(set_attr "ppro_uops" "many") + (set (attr "type") + (if_then_else (and (eq_attr "alternative" "0") + (and (ge (minus (match_dup 0) (pc)) + (const_int -128)) + (lt (minus (match_dup 0) (pc)) + (const_int 124)))) + (const_string "ibr") + (const_string "multi")))]) - return AS1 (jmp,%*%0); -}") +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 1 "register_operand" "") + (const_int 1)) + (match_operand 0 "" "") + (pc))) + (set (match_dup 1) + (plus:SI (match_dup 1) + (const_int -1))) + (clobber (match_scratch:SI 2 "")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_USE_LOOP + && reload_completed + && REGNO (operands[1]) != 2" + [(parallel [(set (reg:CCZ 17) + (compare:CCZ (plus:SI (match_dup 1) (const_int -1)) + (const_int 0))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))]) + (set (pc) (if_then_else (ne (reg:CCZ 17) (const_int 0)) + (match_dup 0) + (pc)))] + "") + +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 1 "register_operand" "") + (const_int 1)) + (match_operand 0 "" "") + (pc))) + (set (match_operand:SI 2 "nonimmediate_operand" "") + (plus:SI (match_dup 1) + (const_int -1))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_USE_LOOP + && reload_completed + && (! REG_P (operands[2]) + || ! rtx_equal_p (operands[1], operands[2]))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (reg:CCZ 17) + (compare:CCZ (plus:SI (match_dup 3) (const_int -1)) + (const_int 0))) + (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))]) + (set (match_dup 2) (match_dup 3)) + (set (pc) (if_then_else (ne (reg:CCZ 17) (const_int 0)) + (match_dup 0) + (pc)))] + "") -;; Call insns. +;; Convert setcc + movzbl to xor + setcc if operands don't overlap. + +(define_peephole2 + [(set (reg 17) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg 17) (const_int 0)])) + (set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1)))] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); + operands[5] = gen_rtx_REG (QImode, REGNO (operands[3])); + ix86_expand_clear (operands[3]); +}) + +;; Similar, but match zero_extendhisi2_and, which adds a clobber. + +(define_peephole2 + [(set (reg 17) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg 17) (const_int 0)])) + (parallel [(set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1))) + (clobber (reg:CC 17))])] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); + operands[5] = gen_rtx_REG (QImode, REGNO (operands[3])); + ix86_expand_clear (operands[3]); +}) + +;; Call instructions. -;; If generating PIC code, the predicate indirect_operand will fail -;; for operands[0] containing symbolic references on all of the named -;; call* patterns. Each named pattern is followed by an unnamed pattern -;; that matches any call to a symbolic CONST (ie, a symbol_ref). The -;; unnamed patterns are only used while generating PIC code, because -;; otherwise the named patterns match. +;; The predicates normally associated with named expanders are not properly +;; checked for calls. This is a bug in the generic code, but it isn't that +;; easy to fix. Ignore it for now and be prepared to fix things up. ;; Call subroutine returning no value. (define_expand "call_pop" - [(parallel [(call (match_operand:QI 0 "indirect_operand" "") - (match_operand:SI 1 "general_operand" "")) + [(parallel [(call (match_operand:QI 0 "" "") + (match_operand:SI 1 "" "")) (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 3 "immediate_operand" "")))])] - "" - " + (match_operand:SI 3 "" "")))])] + "!TARGET_64BIT" { - rtx addr; - if (operands[3] == const0_rtx) { - emit_insn (gen_call (operands[0], operands[1])); + emit_insn (gen_call (operands[0], operands[1], constm1_rtx)); DONE; } - - if (flag_pic) + /* Static functions and indirect calls don't need + current_function_uses_pic_offset_table. */ + if (flag_pic + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) current_function_uses_pic_offset_table = 1; - - /* With half-pic, force the address into a register. */ - addr = XEXP (operands[0], 0); - if (GET_CODE (addr) != REG && HALF_PIC_P () && !CONSTANT_ADDRESS_P (addr)) - XEXP (operands[0], 0) = force_reg (Pmode, addr); - - if (! expander_call_insn_operand (operands[0], QImode)) - operands[0] - = change_address (operands[0], VOIDmode, - copy_to_mode_reg (Pmode, XEXP (operands[0], 0))); -}") - -(define_insn "" - [(call (match_operand:QI 0 "call_insn_operand" "m") - (match_operand:SI 1 "general_operand" "g")) + if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) + XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + if (TARGET_64BIT) + abort(); +}) + +(define_insn "*call_pop_0" + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 3 "immediate_operand" "i")))] - "" - "* + (match_operand:SI 2 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_pop_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) + (match_operand:SI 1 "" "")) + (set (reg:SI 7) (plus:SI (reg:SI 7) + (match_operand:SI 2 "immediate_operand" "i")))] + "!TARGET_64BIT" { - if (GET_CODE (operands[0]) == MEM - && ! CONSTANT_ADDRESS_P (XEXP (operands[0], 0))) + if (constant_call_address_operand (operands[0], Pmode)) { - operands[0] = XEXP (operands[0], 0); - return AS1 (call,%*%0); + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A0"; else - return AS1 (call,%P0); -}") - -(define_insn "" - [(call (mem:QI (match_operand:SI 0 "symbolic_operand" "")) - (match_operand:SI 1 "general_operand" "g")) - (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 3 "immediate_operand" "i")))] - "!HALF_PIC_P ()" - "call %P0") + return "call\t%A0"; +} + [(set_attr "type" "call")]) (define_expand "call" - [(call (match_operand:QI 0 "indirect_operand" "") - (match_operand:SI 1 "general_operand" ""))] + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] ;; Operand 1 not used on the i386. "" - " { - rtx addr; - - if (flag_pic) + rtx insn; + /* Static functions and indirect calls don't need + current_function_uses_pic_offset_table. */ + if (flag_pic + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && ! SYMBOL_REF_FLAG (XEXP (operands[0], 0))) current_function_uses_pic_offset_table = 1; - /* With half-pic, force the address into a register. */ - addr = XEXP (operands[0], 0); - if (GET_CODE (addr) != REG && HALF_PIC_P () && !CONSTANT_ADDRESS_P (addr)) - XEXP (operands[0], 0) = force_reg (Pmode, addr); + if (! call_insn_operand (XEXP (operands[0], 0), Pmode)) + XEXP (operands[0], 0) = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); + if (TARGET_64BIT && INTVAL (operands[2]) >= 0) + { + rtx reg = gen_rtx_REG (QImode, 0); + emit_move_insn (reg, operands[2]); + insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); + DONE; + } + insn = emit_call_insn (gen_call_exp (operands[0], operands[1])); + DONE; +}) - if (! expander_call_insn_operand (operands[0], QImode)) - operands[0] - = change_address (operands[0], VOIDmode, - copy_to_mode_reg (Pmode, XEXP (operands[0], 0))); -}") +(define_expand "call_exp" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" ""))] + "" + "") -(define_insn "" - [(call (match_operand:QI 0 "call_insn_operand" "m") - (match_operand:SI 1 "general_operand" "g"))] - ;; Operand 1 not used on the i386. +(define_insn "*call_0" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] "" - "* { - if (GET_CODE (operands[0]) == MEM - && ! CONSTANT_ADDRESS_P (XEXP (operands[0], 0))) + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" ""))] + "!TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], QImode)) { - operands[0] = XEXP (operands[0], 0); - return AS1 (call,%*%0); + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A0"; else - return AS1 (call,%P0); -}") - -(define_insn "" - [(call (mem:QI (match_operand:SI 0 "symbolic_operand" "")) - (match_operand:SI 1 "general_operand" "g"))] - ;; Operand 1 not used on the i386. - "!HALF_PIC_P ()" - "call %P0") + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" ""))] + "TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], QImode)) + { + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; + } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A0"; + else + return "call\t%A0"; +} + [(set_attr "type" "call")]) ;; Call subroutine, returning value in operand 0 ;; (which must be a hard register). (define_expand "call_value_pop" [(parallel [(set (match_operand 0 "" "") - (call (match_operand:QI 1 "indirect_operand" "") - (match_operand:SI 2 "general_operand" ""))) + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 4 "immediate_operand" "")))])] - "" - " + (match_operand:SI 4 "" "")))])] + "!TARGET_64BIT" { - rtx addr; - if (operands[4] == const0_rtx) { - emit_insn (gen_call_value (operands[0], operands[1], operands[2])); + emit_insn (gen_call_value (operands[0], operands[1], operands[2], + constm1_rtx)); DONE; } - - if (flag_pic) + /* Static functions and indirect calls don't need + current_function_uses_pic_offset_table. */ + if (flag_pic + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) current_function_uses_pic_offset_table = 1; - - /* With half-pic, force the address into a register. */ - addr = XEXP (operands[1], 0); - if (GET_CODE (addr) != REG && HALF_PIC_P () && !CONSTANT_ADDRESS_P (addr)) - XEXP (operands[1], 0) = force_reg (Pmode, addr); - - if (! expander_call_insn_operand (operands[1], QImode)) - operands[1] - = change_address (operands[1], VOIDmode, - copy_to_mode_reg (Pmode, XEXP (operands[1], 0))); -}") - -(define_insn "" - [(set (match_operand 0 "" "=rf") - (call (match_operand:QI 1 "call_insn_operand" "m") - (match_operand:SI 2 "general_operand" "g"))) - (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 4 "immediate_operand" "i")))] - "" - "* -{ - if (GET_CODE (operands[1]) == MEM - && ! CONSTANT_ADDRESS_P (XEXP (operands[1], 0))) - { - operands[1] = XEXP (operands[1], 0); - output_asm_insn (AS1 (call,%*%1), operands); - } - else - output_asm_insn (AS1 (call,%P1), operands); - - RET; -}") - -(define_insn "" - [(set (match_operand 0 "" "=rf") - (call (mem:QI (match_operand:SI 1 "symbolic_operand" "")) - (match_operand:SI 2 "general_operand" "g"))) - (set (reg:SI 7) (plus:SI (reg:SI 7) - (match_operand:SI 4 "immediate_operand" "i")))] - "!HALF_PIC_P ()" - "call %P1") + if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) + XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); +}) (define_expand "call_value" [(set (match_operand 0 "" "") - (call (match_operand:QI 1 "indirect_operand" "") - (match_operand:SI 2 "general_operand" "")))] + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] ;; Operand 2 not used on the i386. "" - " { - rtx addr; - - if (flag_pic) + rtx insn; + /* Static functions and indirect calls don't need + current_function_uses_pic_offset_table. */ + if (flag_pic + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && ! SYMBOL_REF_FLAG (XEXP (operands[1], 0))) current_function_uses_pic_offset_table = 1; - - /* With half-pic, force the address into a register. */ - addr = XEXP (operands[1], 0); - if (GET_CODE (addr) != REG && HALF_PIC_P () && !CONSTANT_ADDRESS_P (addr)) - XEXP (operands[1], 0) = force_reg (Pmode, addr); - - if (! expander_call_insn_operand (operands[1], QImode)) - operands[1] - = change_address (operands[1], VOIDmode, - copy_to_mode_reg (Pmode, XEXP (operands[1], 0))); -}") - -(define_insn "" - [(set (match_operand 0 "" "=rf") - (call (match_operand:QI 1 "call_insn_operand" "m") - (match_operand:SI 2 "general_operand" "g")))] - ;; Operand 2 not used on the i386. - "" - "* -{ - if (GET_CODE (operands[1]) == MEM - && ! CONSTANT_ADDRESS_P (XEXP (operands[1], 0))) + if (! call_insn_operand (XEXP (operands[1], 0), Pmode)) + XEXP (operands[1], 0) = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + if (TARGET_64BIT && INTVAL (operands[3]) >= 0) { - operands[1] = XEXP (operands[1], 0); - output_asm_insn (AS1 (call,%*%1), operands); + rtx reg = gen_rtx_REG (QImode, 0); + emit_move_insn (reg, operands[3]); + insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], + operands[2])); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); + DONE; } - else - output_asm_insn (AS1 (call,%P1), operands); - - RET; -}") + insn = emit_call_insn (gen_call_value_exp (operands[0], operands[1], + operands[2])); + DONE; +}) -(define_insn "" - [(set (match_operand 0 "" "=rf") - (call (mem:QI (match_operand:SI 1 "symbolic_operand" "")) - (match_operand:SI 2 "general_operand" "g")))] - ;; Operand 2 not used on the i386. - "!HALF_PIC_P ()" - "call %P1") +(define_expand "call_value_exp" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" "")))] + "" + "") ;; Call subroutine returning any type. @@ -6890,7 +13533,6 @@ byte_xor_operation: (match_operand 1 "" "") (match_operand 2 "" "")])] "" - " { int i; @@ -6901,8 +13543,10 @@ byte_xor_operation: emit_call_insn (TARGET_80387 ? gen_call_value (gen_rtx_REG (XCmode, FIRST_FLOAT_REG), - operands[0], const0_rtx) - : gen_call (operands[0], const0_rtx)); + operands[0], const0_rtx, + GEN_INT (SSE_REGPARM_MAX - 1)) + : gen_call (operands[0], const0_rtx, + GEN_INT (SSE_REGPARM_MAX - 1))); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -6917,7 +13561,9 @@ byte_xor_operation: emit_insn (gen_blockage ()); DONE; -}") +}) + +;; Prologue and epilogue instructions ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and ;; all of memory. This blocks insns from being moved across this point. @@ -6926,827 +13572,2252 @@ byte_xor_operation: [(unspec_volatile [(const_int 0)] 0)] "" "" - [(set_attr "memory" "none")]) + [(set_attr "length" "0")]) ;; Insn emitted into the body of a function to return from a function. ;; This is only done if the function's epilogue is known to be simple. -;; See comments for simple_386_epilogue in i386.c. +;; See comments for ix86_can_use_return_insn_p in i386.c. (define_expand "return" [(return)] "ix86_can_use_return_insn_p ()" - "") +{ + if (current_function_pops_args) + { + rtx popc = GEN_INT (current_function_pops_args); + emit_jump_insn (gen_return_pop_internal (popc)); + DONE; + } +}) (define_insn "return_internal" [(return)] "reload_completed" "ret" - [(set_attr "memory" "none")]) + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) (define_insn "return_pop_internal" [(return) (use (match_operand:SI 0 "const_int_operand" ""))] "reload_completed" - "ret %0" - [(set_attr "memory" "none")]) + "ret\t%0" + [(set_attr "length" "3") + (set_attr "length_immediate" "2") + (set_attr "modrm" "0")]) + +(define_insn "return_indirect_internal" + [(return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) (define_insn "nop" [(const_int 0)] "" "nop" - [(set_attr "memory" "none")]) + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0") + (set_attr "ppro_uops" "one")]) (define_expand "prologue" [(const_int 1)] "" - " -{ - ix86_expand_prologue (); - DONE; -}") - -;; The use of UNSPEC here is currently not necessary - a simple SET of ebp -;; to itself would be enough. But this way we are safe even if some optimizer -;; becomes too clever in the future. -(define_insn "prologue_set_stack_ptr" - [(set (reg:SI 7) - (minus:SI (reg:SI 7) (match_operand:SI 0 "immediate_operand" "i"))) - (set (reg:SI 6) (unspec:SI [(reg:SI 6)] 4))] - "" - "* -{ - rtx xops [2]; - - xops[0] = operands[0]; - xops[1] = stack_pointer_rtx; - output_asm_insn (AS2 (sub%L1,%0,%1), xops); - RET; -}" - [(set_attr "memory" "none")]) + "ix86_expand_prologue (); DONE;") (define_insn "prologue_set_got" - [(set (match_operand:SI 0 "" "") - (unspec_volatile + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(plus:SI (match_dup 0) (plus:SI (match_operand:SI 1 "symbolic_operand" "") - (minus:SI (pc) (match_operand 2 "" ""))))] 1))] - "" - "* + (minus:SI (pc) (match_operand 2 "" ""))))] 1)) + (clobber (reg:CC 17))] + "!TARGET_64BIT" { - char buffer[64]; - + if (GET_CODE (operands[2]) == LABEL_REF) + operands[2] = XEXP (operands[2], 0); if (TARGET_DEEP_BRANCH_PREDICTION) - { - sprintf (buffer, \"addl %s,%%0\", XSTR (operands[1], 0)); - output_asm_insn (buffer, operands); - } + return "add{l}\t{%1, %0|%0, %1}"; else - { - sprintf (buffer, \"addl %s+[.-%%X2],%%0\", XSTR (operands[1], 0)); - output_asm_insn (buffer, operands); - } - RET; -}") + return "add{l}\t{%1+[.-%X2], %0|%0, %a1+(.-%X2)}"; +} + [(set_attr "type" "alu") + ; Since this insn may have two constant operands, we must set the + ; length manually. + (set_attr "length_immediate" "4") + (set_attr "mode" "SI")]) (define_insn "prologue_get_pc" - [(set (match_operand:SI 0 "" "") - (unspec_volatile [(plus:SI (pc) (match_operand 1 "" ""))] 2))] - "" - "* + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(plus:SI (pc) (match_operand 1 "" ""))] 2))] + "!TARGET_64BIT" { - output_asm_insn (AS1 (call,%X1), operands); - if (! TARGET_DEEP_BRANCH_PREDICTION) + if (GET_CODE (operands[1]) == LABEL_REF) + operands[1] = XEXP (operands[1], 0); + output_asm_insn ("call\t%X1", operands); + if (! TARGET_DEEP_BRANCH_PREDICTION) { - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", CODE_LABEL_NUMBER (operands[1])); - } - RET; -}" - [(set_attr "memory" "none")]) - -(define_insn "prologue_get_pc_and_set_got" - [(unspec_volatile [(match_operand:SI 0 "" "")] 3)] - "" - "* -{ - operands[1] = gen_label_rtx (); - output_asm_insn (AS1 (call,%X1), operands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", - CODE_LABEL_NUMBER (operands[1])); - output_asm_insn (AS1 (pop%L0,%0), operands); - output_asm_insn (\"addl $%__GLOBAL_OFFSET_TABLE_+[.-%X1],%0\", operands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[1])); + } RET; -}" - [(set_attr "memory" "none")]) +} + [(set_attr "type" "multi")]) (define_expand "epilogue" [(const_int 1)] "" - " -{ - ix86_expand_epilogue (); - DONE; -}") - -(define_insn "epilogue_set_stack_ptr" - [(set (reg:SI 7) (reg:SI 6)) - (clobber (reg:SI 6))] - "" - "* -{ - rtx xops [2]; + "ix86_expand_epilogue (1); DONE;") - xops[0] = frame_pointer_rtx; - xops[1] = stack_pointer_rtx; - output_asm_insn (AS2 (mov%L0,%0,%1), xops); - RET; -}" - [(set_attr "memory" "none")]) - -(define_insn "leave" - [(const_int 2) - (clobber (reg:SI 6)) - (clobber (reg:SI 7))] +(define_expand "sibcall_epilogue" + [(const_int 1)] "" - "leave" - [(set_attr "memory" "none")]) + "ix86_expand_epilogue (0); DONE;") -(define_insn "pop" - [(set (match_operand:SI 0 "register_operand" "r") - (mem:SI (reg:SI 7))) - (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))] +(define_expand "eh_return" + [(use (match_operand 0 "register_operand" "")) + (use (match_operand 1 "register_operand" ""))] "" - "* { - output_asm_insn (AS1 (pop%L0,%P0), operands); - RET; -}" - [(set_attr "memory" "load")]) - -(define_expand "movstrsi" - [(parallel [(set (match_operand:BLK 0 "memory_operand" "") - (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:SI 2 "const_int_operand" "")) - (use (match_operand:SI 3 "const_int_operand" "")) - (clobber (match_scratch:SI 4 "")) - (clobber (match_dup 5)) - (clobber (match_dup 6))])] - "" - " -{ - rtx addr0, addr1; - - if (GET_CODE (operands[2]) != CONST_INT) - FAIL; - - addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); - - operands[5] = addr0; - operands[6] = addr1; + rtx tmp, sa = operands[0], ra = operands[1]; + + /* Tricky bit: we write the address of the handler to which we will + be returning into someone else's stack frame, one word below the + stack address we wish to restore. */ + tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); + tmp = plus_constant (tmp, -UNITS_PER_WORD); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (tmp, ra); + + if (Pmode == SImode) + emit_insn (gen_eh_return_si (sa)); + else + emit_insn (gen_eh_return_di (sa)); + emit_barrier (); + DONE; +}) - operands[0] = change_address (operands[0], VOIDmode, addr0); - operands[1] = change_address (operands[1], VOIDmode, addr1); -}") +(define_insn_and_split "eh_return_si" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")] 13)] + "!TARGET_64BIT" + "#" + "reload_completed" + [(const_int 1)] + "ix86_expand_epilogue (2); DONE;") -;; It might seem that operands 0 & 1 could use predicate register_operand. -;; But strength reduction might offset the MEM expression. So we let -;; reload put the address into %edi & %esi. +(define_insn_and_split "eh_return_di" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "c")] 13)] + "TARGET_64BIT" + "#" + "reload_completed" + [(const_int 1)] + "ix86_expand_epilogue (2); DONE;") -(define_insn "" - [(set (mem:BLK (match_operand:SI 0 "address_operand" "D")) - (mem:BLK (match_operand:SI 1 "address_operand" "S"))) - (use (match_operand:SI 2 "const_int_operand" "n")) - (use (match_operand:SI 3 "immediate_operand" "i")) - (clobber (match_scratch:SI 4 "=&c")) - (clobber (match_dup 0)) - (clobber (match_dup 1))] +(define_insn "leave" + [(set (reg:SI 7) (plus:SI (reg:SI 6) (const_int 4))) + (set (reg:SI 6) (mem:SI (reg:SI 6))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "leave" + [(set_attr "length_immediate" "0") + (set_attr "length" "1") + (set_attr "modrm" "0") + (set_attr "modrm" "0") + (set_attr "athlon_decode" "vector") + (set_attr "ppro_uops" "few")]) + +(define_insn "leave_rex64" + [(set (reg:DI 7) (plus:DI (reg:DI 6) (const_int 8))) + (set (reg:DI 6) (mem:DI (reg:DI 6))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "leave" + [(set_attr "length_immediate" "0") + (set_attr "length" "1") + (set_attr "modrm" "0") + (set_attr "modrm" "0") + (set_attr "athlon_decode" "vector") + (set_attr "ppro_uops" "few")]) + +(define_expand "ffssi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ffs:SI (match_operand:SI 1 "general_operand" "")))] "" - "* { - rtx xops[2]; + rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); + rtx in = operands[1]; - output_asm_insn (\"cld\", operands); - if (GET_CODE (operands[2]) == CONST_INT) + if (TARGET_CMOVE) { - if (INTVAL (operands[2]) & ~0x03) - { - xops[0] = GEN_INT ((INTVAL (operands[2]) >> 2) & 0x3fffffff); - xops[1] = operands[4]; - - output_asm_insn (AS2 (mov%L1,%0,%1), xops); -#ifdef INTEL_SYNTAX - output_asm_insn (\"rep movsd\", xops); -#else - output_asm_insn (\"rep\;movsl\", xops); -#endif - } - if (INTVAL (operands[2]) & 0x02) - output_asm_insn (\"movsw\", operands); - if (INTVAL (operands[2]) & 0x01) - output_asm_insn (\"movsb\", operands); + emit_move_insn (tmp, constm1_rtx); + emit_insn (gen_ffssi_1 (out, in)); + emit_insn (gen_rtx_SET (VOIDmode, out, + gen_rtx_IF_THEN_ELSE (SImode, + gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx), + tmp, + out))); + emit_insn (gen_addsi3 (out, out, const1_rtx)); + emit_move_insn (operands[0], out); } - else - abort (); - RET; -}") -(define_expand "clrstrsi" - [(set (match_dup 3) (const_int 0)) - (parallel [(set (match_operand:BLK 0 "memory_operand" "") - (const_int 0)) - (use (match_operand:SI 1 "const_int_operand" "")) - (use (match_operand:SI 2 "const_int_operand" "")) - (use (match_dup 3)) - (clobber (match_scratch:SI 4 "")) - (clobber (match_dup 5))])] - "" - " -{ - rtx addr0; - - if (GET_CODE (operands[1]) != CONST_INT) - FAIL; - - addr0 = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); - - operands[3] = gen_reg_rtx (SImode); - operands[5] = addr0; - - operands[0] = gen_rtx_MEM (BLKmode, addr0); -}") - -;; It might seem that operand 0 could use predicate register_operand. -;; But strength reduction might offset the MEM expression. So we let -;; reload put the address into %edi. - -(define_insn "*bzero" - [(set (mem:BLK (match_operand:SI 0 "address_operand" "D")) - (const_int 0)) - (use (match_operand:SI 1 "const_int_operand" "n")) - (use (match_operand:SI 2 "immediate_operand" "i")) - (use (match_operand:SI 3 "register_operand" "a")) - (clobber (match_scratch:SI 4 "=&c")) - (clobber (match_dup 0))] - "" - "* -{ - rtx xops[2]; - - output_asm_insn (\"cld\", operands); - if (GET_CODE (operands[1]) == CONST_INT) + /* Pentium bsf instruction is extremly slow. The following code is + recommended by the Intel Optimizing Manual as a reasonable replacement: + TEST EAX,EAX + JZ SHORT BS2 + XOR ECX,ECX + MOV DWORD PTR [TEMP+4],ECX + SUB ECX,EAX + AND EAX,ECX + MOV DWORD PTR [TEMP],EAX + FILD QWORD PTR [TEMP] + FSTP QWORD PTR [TEMP] + WAIT ; WAIT only needed for compatibility with + ; earlier processors + MOV ECX, DWORD PTR [TEMP+4] + SHR ECX,20 + SUB ECX,3FFH + TEST EAX,EAX ; clear zero flag + BS2: + Following piece of code expand ffs to similar beast. + */ + + else if (TARGET_PENTIUM && !optimize_size && TARGET_80387) { - unsigned int count = INTVAL (operands[1]) & 0xffffffff; - if (count & ~0x03) - { - xops[0] = GEN_INT (count / 4); - xops[1] = operands[4]; - - /* K6: stos takes 1 cycle, rep stos takes 8 + %ecx cycles. - 80386: 4/5+5n (+2 for set of ecx) - 80486: 5/7+5n (+1 for set of ecx) - */ - if (count / 4 < ((int) ix86_cpu < (int)PROCESSOR_PENTIUM ? 4 : 6)) - { - do -#ifdef INTEL_SYNTAX - output_asm_insn (\"stosd\", xops); -#else - output_asm_insn (\"stosl\", xops); -#endif - while ((count -= 4) > 3); - } - else - { - output_asm_insn (AS2 (mov%L1,%0,%1), xops); -#ifdef INTEL_SYNTAX - output_asm_insn (\"rep stosd\", xops); -#else - output_asm_insn (\"rep\;stosl\", xops); -#endif - } - } - if (INTVAL (operands[1]) & 0x02) - output_asm_insn (\"stosw\", operands); - if (INTVAL (operands[1]) & 0x01) - output_asm_insn (\"stosb\", operands); + rtx label = gen_label_rtx (); + rtx lo, hi; + rtx mem = assign_386_stack_local (DImode, 0); + rtx fptmp = gen_reg_rtx (DFmode); + split_di (&mem, 1, &lo, &hi); + + emit_move_insn (out, const0_rtx); + + emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label); + + emit_move_insn (hi, out); + emit_insn (gen_subsi3 (out, out, in)); + emit_insn (gen_andsi3 (out, out, in)); + emit_move_insn (lo, out); + emit_insn (gen_floatdidf2 (fptmp,mem)); + emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp); + emit_move_insn (out, hi); + emit_insn (gen_lshrsi3 (out, out, GEN_INT (20))); + emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], out); } else - abort (); - RET; -}") - -(define_expand "cmpstrsi" - [(parallel [(set (match_operand:SI 0 "general_operand" "") - (compare:SI (match_operand:BLK 1 "general_operand" "") - (match_operand:BLK 2 "general_operand" ""))) - (use (match_operand:SI 3 "general_operand" "")) - (use (match_operand:SI 4 "immediate_operand" "")) - (clobber (match_dup 5)) - (clobber (match_dup 6)) - (clobber (match_dup 3))])] - "" - " -{ - rtx addr1, addr2; - - addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); - addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); - operands[3] = copy_to_mode_reg (SImode, operands[3]); - - operands[5] = addr1; - operands[6] = addr2; - - operands[1] = gen_rtx_MEM (BLKmode, addr1); - operands[2] = gen_rtx_MEM (BLKmode, addr2); - -}") - -;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is -;; zero. Emit extra code to make sure that a zero-length compare is EQ. - -;; It might seem that operands 0 & 1 could use predicate register_operand. -;; But strength reduction might offset the MEM expression. So we let -;; reload put the address into %edi & %esi. - -;; ??? Most comparisons have a constant length, and it's therefore -;; possible to know that the length is non-zero, and to avoid the extra -;; code to handle zero-length compares. - -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=&r") - (compare:SI (mem:BLK (match_operand:SI 1 "address_operand" "S")) - (mem:BLK (match_operand:SI 2 "address_operand" "D")))) - (use (match_operand:SI 3 "register_operand" "c")) - (use (match_operand:SI 4 "immediate_operand" "i")) - (clobber (match_dup 1)) - (clobber (match_dup 2)) - (clobber (match_dup 3))] - "" - "* -{ - rtx xops[2], label; - - label = gen_label_rtx (); - - output_asm_insn (\"cld\", operands); - output_asm_insn (AS2 (xor%L0,%0,%0), operands); - output_asm_insn (\"repz\;cmps%B2\", operands); - output_asm_insn (\"je %l0\", &label); - - xops[0] = operands[0]; - xops[1] = const1_rtx; - output_asm_insn (AS2 (sbb%L0,%0,%0), xops); - if (QI_REG_P (xops[0])) - output_asm_insn (AS2 (or%B0,%1,%b0), xops); - else - output_asm_insn (AS2 (or%L0,%1,%0), xops); - - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", CODE_LABEL_NUMBER (label)); - RET; -}") + { + emit_move_insn (tmp, const0_rtx); + emit_insn (gen_ffssi_1 (out, in)); + emit_insn (gen_rtx_SET (VOIDmode, + gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)), + gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx))); + emit_insn (gen_negsi2 (tmp, tmp)); + emit_insn (gen_iorsi3 (out, out, tmp)); + emit_insn (gen_addsi3 (out, out, const1_rtx)); + emit_move_insn (operands[0], out); + } + DONE; +}) -(define_insn "" - [(set (cc0) - (compare:SI (mem:BLK (match_operand:SI 0 "address_operand" "S")) - (mem:BLK (match_operand:SI 1 "address_operand" "D")))) - (use (match_operand:SI 2 "register_operand" "c")) - (use (match_operand:SI 3 "immediate_operand" "i")) - (clobber (match_dup 0)) - (clobber (match_dup 1)) - (clobber (match_dup 2))] +(define_insn "ffssi_1" + [(set (reg:CCZ 17) + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_dup 1)] 5))] "" - "* -{ - rtx xops[2]; - - cc_status.flags |= CC_NOT_SIGNED; + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "ppro_uops" "few")]) - xops[0] = gen_rtx_REG (QImode, 0); - xops[1] = CONST0_RTX (QImode); +;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger +;; and slower than the two-byte movzx insn needed to do the work in SImode. + +;; These patterns match the binary 387 instructions for addM3, subM3, +;; mulM3 and divM3. There are three patterns for each of DFmode and +;; SFmode. The first is the normal insn, the second the same insn but +;; with one operand a conversion, and the third the same insn but with +;; the other operand a conversion. The conversion may be SFmode or +;; SImode if the target mode DFmode, but only SImode if the target mode +;; is SFmode. - output_asm_insn (\"cld\", operands); - output_asm_insn (AS2 (test%B0,%1,%0), xops); - return \"repz\;cmps%B2\"; -}") +;; Gcc is slightly more smart about handling normal two address instructions +;; so use special patterns for add and mull. +(define_insn "*fop_sf_comm_nosse" + [(set (match_operand:SF 0 "register_operand" "=f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 && !TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_comm" + [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "%0,0") + (match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))] + "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (const_string "sse") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_comm_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "%0") + (match_operand:SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE_MATH && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) - -;; Note, you cannot optimize away the branch following the bsfl by assuming -;; that the destination is not modified if the input is 0, since not all -;; x86 implementations do this. +(define_insn "*fop_df_comm_nosse" + [(set (match_operand:DF 0 "register_operand" "=f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_comm" + [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "%0,0") + (match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))] + "TARGET_80387 && TARGET_SSE_MATH && TARGET_SSE2 && TARGET_MIX_SSE_I387 + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (const_string "sse") + (if_then_else (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_comm_sse" + [(set (match_operand:DF 0 "register_operand" "=Y") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "%0") + (match_operand:DF 2 "nonimmediate_operand" "Ym")]))] + "TARGET_SSE2 && TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) -(define_expand "ffssi2" - [(set (match_operand:SI 0 "general_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] - "" - " -{ - rtx label = gen_label_rtx (), temp = gen_reg_rtx (SImode); +(define_insn "*fop_xf_comm" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "!TARGET_64BIT && TARGET_80387 + && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_tf_comm" + [(set (match_operand:TF 0 "register_operand" "=f") + (match_operator:TF 3 "binary_fp_operator" + [(match_operand:TF 1 "register_operand" "%0") + (match_operand:TF 2 "register_operand" "f")]))] + "TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) - emit_insn (gen_ffssi_1 (temp, operands[1])); - emit_cmp_insn (operands[1], const0_rtx, NE, NULL_RTX, SImode, 0, 0); - emit_jump_insn (gen_bne (label)); - emit_move_insn (temp, constm1_rtx); - emit_label (label); - temp = expand_binop (SImode, add_optab, temp, const1_rtx, - operands[0], 0, OPTAB_WIDEN); +(define_insn "*fop_sf_1_nosse" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0,fm") + (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && !TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_1" + [(set (match_operand:SF 0 "register_operand" "=f,f,x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))] + "TARGET_80387 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "sse") + (match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_sf_1_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" + "* return output_387_binary_op (insn, operands);" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) - if (temp != operands[0]) - emit_move_insn (operands[0], temp); - DONE; -}") +;; ??? Add SSE splitters for these! +(define_insn "*fop_sf_2" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + (match_operand:SF 2 "register_operand" "0,0")]))] + "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "ppro_uops" "many") + (set_attr "mode" "SI")]) -(define_insn "ffssi_1" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:SI 1 "nonimmediate_operand" "rm")] 5))] - "" - "* return AS2 (bsf%L0,%1,%0);") +(define_insn "*fop_sf_3" + [(set (match_operand:SF 0 "register_operand" "=f,f") + (match_operator:SF 3 "binary_fp_operator" + [(match_operand:SF 1 "register_operand" "0,0") + (float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:SF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:SF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "ppro_uops" "many") + (set_attr "mode" "SI")]) -(define_expand "ffshi2" - [(set (match_operand:SI 0 "general_operand" "") - (ffs:HI (match_operand:HI 1 "general_operand" "")))] - "" - " -{ - rtx label = gen_label_rtx (), temp = gen_reg_rtx (HImode); +(define_insn "*fop_df_1_nosse" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0,fm") + (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + + +(define_insn "*fop_df_1" + [(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387 + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c' + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "sse") + (match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_df_1_sse" + [(set (match_operand:DF 0 "register_operand" "=Y") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "Ym")]))] + "TARGET_SSE2 && TARGET_SSE_MATH + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" + "* return output_387_binary_op (insn, operands);" + [(set_attr "type" "sse")]) - emit_insn (gen_ffshi_1 (temp, operands[1])); - emit_cmp_insn (operands[1], const0_rtx, NE, NULL_RTX, HImode, 0, 0); - emit_jump_insn (gen_bne (label)); - emit_move_insn (temp, constm1_rtx); - emit_label (label); - temp = expand_binop (HImode, add_optab, temp, const1_rtx, - operands[0], 0, OPTAB_WIDEN); +;; ??? Add SSE splitters for these! +(define_insn "*fop_df_2" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + (match_operand:DF 2 "register_operand" "0,0")]))] + "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "ppro_uops" "many") + (set_attr "mode" "SI")]) - if (temp != operands[0]) - emit_move_insn (operands[0], temp); - DONE; -}") +(define_insn "*fop_df_3" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,0") + (float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "ppro_uops" "many") + (set_attr "mode" "SI")]) -(define_insn "ffshi_1" - [(set (match_operand:HI 0 "register_operand" "=r") - (unspec:HI [(match_operand:SI 1 "nonimmediate_operand" "rm")] 5))] - "" - "* return AS2 (bsf%W0,%1,%0);") - -;; These patterns match the binary 387 instructions for addM3, subM3, -;; mulM3 and divM3. There are three patterns for each of DFmode and -;; SFmode. The first is the normal insn, the second the same insn but -;; with one operand a conversion, and the third the same insn but with -;; the other operand a conversion. +(define_insn "*fop_df_4" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:DF 2 "register_operand" "0,f")]))] + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) -(define_insn "" +(define_insn "*fop_df_5" [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_387_op" - [(match_operand:DF 1 "nonimmediate_operand" "0,fm") - (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))] - "TARGET_80387" + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,f") + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) + (const_string "fop"))) + (set_attr "mode" "SF")]) -(define_insn "" +(define_insn "*fop_xf_1" [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_387_op" + (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (match_operand:XF 2 "register_operand" "f,0")]))] - "TARGET_80387" + "!TARGET_64BIT && TARGET_80387 + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_tf_1" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(match_operand:TF 1 "register_operand" "0,f") + (match_operand:TF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) + (const_string "fop"))) + (set_attr "mode" "XF")]) -(define_insn "" +(define_insn "*fop_xf_2" [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_387_op" + (match_operator:XF 3 "binary_fp_operator" + [(float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + (match_operand:XF 2 "register_operand" "0,0")]))] + "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "many")]) + +(define_insn "*fop_tf_2" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(float:TF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) + (match_operand:TF 2 "register_operand" "0,0")]))] + "TARGET_80387 && TARGET_USE_FIOP" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "many")]) + +(define_insn "*fop_xf_3" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,0") + (float:XF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] + "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "many")]) + +(define_insn "*fop_tf_3" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(match_operand:TF 1 "register_operand" "0,0") + (float:TF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && TARGET_USE_FIOP" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "SI") + (set_attr "ppro_uops" "many")]) + +(define_insn "*fop_xf_4" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] + "!TARGET_64BIT && TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_tf_4" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:TF 2 "register_operand" "0,f")]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) + (const_string "fop"))) + (set_attr "mode" "SF")]) -(define_insn "" +(define_insn "*fop_xf_5" [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_387_op" + (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" + "!TARGET_64BIT && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) - -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_387_op" - [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) - (match_operand:DF 2 "register_operand" "0,f")]))] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_tf_5" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(match_operand:TF 1 "register_operand" "0,f") + (float_extend:TF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) + (const_string "fop"))) + (set_attr "mode" "SF")]) -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (match_operator:DF 3 "binary_387_op" - [(match_operand:DF 1 "register_operand" "0,f") - (float_extend:DF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] +(define_insn "*fop_xf_6" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "!TARGET_64BIT && TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_tf_6" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) + (match_operand:TF 2 "register_operand" "0,f")]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) + (const_string "fop"))) + (set_attr "mode" "DF")]) -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (match_operator:SF 3 "binary_387_op" - [(match_operand:SF 1 "nonimmediate_operand" "0,fm") - (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))] +(define_insn "*fop_xf_7" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] + "!TARGET_64BIT && TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_insn "*fop_tf_7" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (match_operator:TF 3 "binary_fp_operator" + [(match_operand:TF 1 "register_operand" "0,f") + (float_extend:TF + (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:DF 3 "is_mul" "") - (const_string "fpmul") - (match_operand:DF 3 "is_div" "") - (const_string "fpdiv") + (cond [(match_operand:TF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:TF 3 "div_operator" "") + (const_string "fdiv") ] - (const_string "fpop") - ) - )]) + (const_string "fop"))) + (set_attr "mode" "DF")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(float (match_operand:SI 1 "register_operand" "")) + (match_operand 2 "register_operand" "")]))] + "TARGET_80387 && reload_completed + && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[4], + operands[2]))); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(match_operand 1 "register_operand" "") + (float (match_operand:SI 2 "register_operand" ""))]))] + "TARGET_80387 && reload_completed + && FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[1], + operands[4]))); + ix86_free_from_memory (GET_MODE (operands[2])); + DONE; +}) -(define_expand "strlensi" - [(parallel [(set (match_dup 4) - (unspec:SI [(mem:BLK (match_operand:BLK 1 "general_operand" "")) - (match_operand:QI 2 "immediate_operand" "") - (match_operand:SI 3 "immediate_operand" "")] 0)) - (clobber (match_dup 1))]) - (set (match_dup 5) - (not:SI (match_dup 4))) - (set (match_operand:SI 0 "register_operand" "") - (plus:SI (match_dup 5) - (const_int -1)))] +;; FPU special functions. + +(define_expand "sqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "")))] + "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) || TARGET_SSE_MATH" +{ + if (!TARGET_SSE_MATH) + operands[1] = force_reg (SFmode, operands[1]); +}) + +(define_insn "sqrtsf2_1" + [(set (match_operand:SF 0 "register_operand" "=f#x,x#f") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0#x,xm#f")))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && (TARGET_SSE_MATH && TARGET_MIX_SSE_I387)" + "@ + fsqrt + sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "fpspc,sse") + (set_attr "mode" "SF,SF") + (set_attr "athlon_decode" "direct,*")]) + +(define_insn "sqrtsf2_1_sse_only" + [(set (match_operand:SF 0 "register_operand" "=x") + (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH && (!TARGET_80387 || !TARGET_MIX_SSE_I387)" + "sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "*")]) + +(define_insn "sqrtsf2_i387" + [(set (match_operand:SF 0 "register_operand" "=f") + (sqrt:SF (match_operand:SF 1 "register_operand" "0")))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && !TARGET_SSE_MATH" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "direct")]) + +(define_expand "sqrtdf2" + [(set (match_operand:DF 0 "register_operand" "") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "")))] + "(! TARGET_NO_FANCY_MATH_387 && TARGET_80387) + || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (!TARGET_SSE2 || !TARGET_SSE_MATH) + operands[1] = force_reg (DFmode, operands[1]); +}) + +(define_insn "sqrtdf2_1" + [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && (TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387)" + "@ + fsqrt + sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "fpspc,sse") + (set_attr "mode" "DF,DF") + (set_attr "athlon_decode" "direct,*")]) + +(define_insn "sqrtdf2_1_sse_only" + [(set (match_operand:DF 0 "register_operand" "=Y") + (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "Ym")))] + "TARGET_SSE2 && TARGET_SSE_MATH && (!TARGET_80387 || !TARGET_MIX_SSE_I387)" + "sqrtsd\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "*")]) + +(define_insn "sqrtdf2_i387" + [(set (match_operand:DF 0 "register_operand" "=f") + (sqrt:DF (match_operand:DF 1 "register_operand" "0")))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && (!TARGET_SSE2 || !TARGET_SSE_MATH)" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (sqrt:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "sqrtxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] + "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 + && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "sqrttf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (sqrt:TF (match_operand:TF 1 "register_operand" "0")))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "!TARGET_64BIT && TARGET_80387 && TARGET_NO_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextenddftf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (sqrt:TF (float_extend:TF + (match_operand:DF 1 "register_operand" "0"))))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "!TARGET_64BIT && TARGET_80387 && TARGET_NO_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "*sqrtextendsftf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (sqrt:TF (float_extend:TF + (match_operand:SF 1 "register_operand" "0"))))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct")]) + +(define_insn "sindf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 1))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "sinsf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 1))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_insn "*sinextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(float_extend:DF + (match_operand:SF 1 "register_operand" "0"))] 1))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "sinxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 1))] + "!TARGET_64BIT && TARGET_80387 && TARGET_NO_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "sintf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 1))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "cosdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 1 "register_operand" "0")] 2))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "cossf2" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 1 "register_operand" "0")] 2))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_insn "*cosextendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(float_extend:DF + (match_operand:SF 1 "register_operand" "0"))] 2))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "cosxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] 2))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "costf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (unspec:TF [(match_operand:TF 1 "register_operand" "0")] 2))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; Block operation instructions + +(define_insn "cld" + [(set (reg:SI 19) (const_int 0))] + "" + "cld" + [(set_attr "type" "cld")]) + +(define_expand "movstrsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "const_int_operand" ""))] "" - " { - if (TARGET_UNROLL_STRLEN && operands[2] == const0_rtx && optimize > 1) - { - rtx address; - rtx scratch; - - /* well it seems that some optimizer does not combine a call like - foo(strlen(bar), strlen(bar)); - when the move and the subtraction is done here. It does calculate - the length just once when these instructions are done inside of - output_strlen_unroll(). But I think since &bar[strlen(bar)] is - often used and I use one fewer register for the lifetime of - output_strlen_unroll() this is better. */ - scratch = gen_reg_rtx (SImode); - address = force_reg (SImode, XEXP (operands[1], 0)); - - /* move address to scratch-register - this is done here because the i586 can do the following and - in the same cycle with the following move. */ - if (GET_CODE (operands[3]) != CONST_INT || INTVAL (operands[3]) < 4) - emit_insn (gen_movsi (scratch, address)); - - emit_insn (gen_movsi (operands[0], address)); - - if(TARGET_USE_Q_REG) - emit_insn (gen_strlensi_unroll5 (operands[0], - operands[3], - scratch, - operands[0])); - else - emit_insn (gen_strlensi_unroll4 (operands[0], - operands[3], - scratch, - operands[0])); - - /* gen_strlensi_unroll[45] returns the address of the zero - at the end of the string, like memchr(), so compute the - length by subtracting the startaddress. */ - emit_insn (gen_subsi3 (operands[0], operands[0], address)); + if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "movstrdi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:DI 2 "nonmemory_operand" "")) + (use (match_operand:DI 3 "const_int_operand" ""))] + "TARGET_64BIT" +{ + if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strmovdi_rex64" + [(set (match_dup 2) + (mem:DI (match_operand:DI 1 "register_operand" ""))) + (set (mem:DI (match_operand:DI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 8))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovdi_rex_1 (operands[0], operands[1], operands[0], + operands[1])); DONE; } + else + operands[2] = gen_reg_rtx (DImode); +}) - operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); - operands[4] = gen_reg_rtx (SImode); - operands[5] = gen_reg_rtx (SImode); -}") -;; It might seem that operands 0 & 1 could use predicate register_operand. -;; But strength reduction might offset the MEM expression. So we let -;; reload put the address into %edi. +(define_expand "strmovsi" + [(set (match_dup 2) + (mem:SI (match_operand:SI 1 "register_operand" ""))) + (set (mem:SI (match_operand:SI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4))) + (clobber (reg:CC 17))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_strmovsi_rex64 (operands[0], operands[1])); + DONE; + } + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0], + operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (SImode); +}) + +(define_expand "strmovsi_rex64" + [(set (match_dup 2) + (mem:SI (match_operand:DI 1 "register_operand" ""))) + (set (mem:SI (match_operand:DI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 4))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovsi_rex_1 (operands[0], operands[1], operands[0], + operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (SImode); +}) -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=&c") - (unspec:SI [(mem:BLK (match_operand:SI 1 "address_operand" "D")) - (match_operand:QI 2 "immediate_operand" "a") - (match_operand:SI 3 "immediate_operand" "i")] 0)) - (clobber (match_dup 1))] +(define_expand "strmovhi" + [(set (match_dup 2) + (mem:HI (match_operand:SI 1 "register_operand" ""))) + (set (mem:HI (match_operand:SI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 2))) + (clobber (reg:CC 17))])] "" - "* { - rtx xops[2]; + if (TARGET_64BIT) + { + emit_insn (gen_strmovhi_rex64 (operands[0], operands[1])); + DONE; + } + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0], + operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (HImode); +}) + +(define_expand "strmovhi_rex64" + [(set (match_dup 2) + (mem:HI (match_operand:DI 1 "register_operand" ""))) + (set (mem:HI (match_operand:DI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 2))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovhi_rex_1 (operands[0], operands[1], operands[0], + operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (HImode); +}) - xops[0] = operands[0]; - xops[1] = constm1_rtx; - output_asm_insn (\"cld\", operands); - output_asm_insn (AS2 (mov%L0,%1,%0), xops); - return \"repnz\;scas%B2\"; -}") +(define_expand "strmovqi" + [(set (match_dup 2) + (mem:QI (match_operand:SI 1 "register_operand" ""))) + (set (mem:QI (match_operand:SI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 1))) + (clobber (reg:CC 17))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_strmovqi_rex64 (operands[0], operands[1])); + DONE; + } + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0], + operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (QImode); +}) -/* Conditional move define_insns. */ +(define_expand "strmovqi_rex64" + [(set (match_dup 2) + (mem:QI (match_operand:DI 1 "register_operand" ""))) + (set (mem:QI (match_operand:DI 0 "register_operand" "")) + (match_dup 2)) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 1))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strmovqi_rex_1 (operands[0], operands[1], operands[0], + operands[1])); + DONE; + } + else + operands[2] = gen_reg_rtx (QImode); +}) + +(define_insn "strmovdi_rex_1" + [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) + (mem:DI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 8))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 8))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsq" + [(set_attr "type" "str") + (set_attr "mode" "DI") + (set_attr "memory" "both")]) + +(define_insn "strmovsi_1" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 4))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 4))) + (use (reg:SI 19))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{movsl|movsd}" + [(set_attr "type" "str") + (set_attr "mode" "SI") + (set_attr "memory" "both")]) + +(define_insn "strmovsi_rex_1" + [(set (mem:SI (match_operand:DI 2 "register_operand" "0")) + (mem:SI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 4))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 4))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{movsl|movsd}" + [(set_attr "type" "str") + (set_attr "mode" "SI") + (set_attr "memory" "both")]) + +(define_insn "strmovhi_1" + [(set (mem:HI (match_operand:SI 2 "register_operand" "0")) + (mem:HI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 2))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 2))) + (use (reg:SI 19))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "strmovhi_rex_1" + [(set (mem:HI (match_operand:DI 2 "register_operand" "0")) + (mem:HI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 2))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 2))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "strmovqi_1" + [(set (mem:QI (match_operand:SI 2 "register_operand" "0")) + (mem:QI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 1))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 1))) + (use (reg:SI 19))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_insn "strmovqi_rex_1" + [(set (mem:QI (match_operand:DI 2 "register_operand" "0")) + (mem:QI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 1))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 1))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_insn "rep_movdi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 3)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI 19))] + "TARGET_64BIT" + "{rep\;movsq|rep movsq}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "rep_movsi" + [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2") + (const_int 2)) + (match_operand:SI 3 "register_operand" "0"))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (ashift:SI (match_dup 5) (const_int 2)) + (match_operand:SI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI 19))] + "!TARGET_64BIT" + "{rep\;movsl|rep movsd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "rep_movsi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 2)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 2)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI 19))] + "TARGET_64BIT" + "{rep\;movsl|rep movsd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "rep_movqi" + [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 5 "register_operand" "2"))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI 19))] + "!TARGET_64BIT" + "{rep\;movsb|rep movsb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "rep_movqi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_operand:DI 3 "register_operand" "0") + (match_operand:DI 5 "register_operand" "2"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5)) + (use (reg:SI 19))] + "TARGET_64BIT" + "{rep\;movsb|rep movsb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) -(define_expand "movsicc" +(define_expand "clrstrsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:SI 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" ""))] + "" +{ + if (ix86_expand_clrstr (operands[0], operands[1], operands[2])) + DONE; + else + FAIL; +}) + +(define_expand "clrstrdi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:DI 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" ""))] + "TARGET_64BIT" +{ + if (ix86_expand_clrstr (operands[0], operands[1], operands[2])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strsetdi_rex64" + [(set (mem:DI (match_operand:DI 0 "register_operand" "")) + (match_operand:DI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsetdi_rex_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_expand "strsetsi" + [(set (mem:SI (match_operand:SI 0 "register_operand" "")) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4))) + (clobber (reg:CC 17))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_strsetsi_rex64 (operands[0], operands[1])); + DONE; + } + else if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_expand "strsetsi_rex64" + [(set (mem:SI (match_operand:DI 0 "register_operand" "")) + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsetsi_rex_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_expand "strsethi" + [(set (mem:HI (match_operand:SI 0 "register_operand" "")) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) + (clobber (reg:CC 17))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_strsethi_rex64 (operands[0], operands[1])); + DONE; + } + else if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_expand "strsethi_rex64" + [(set (mem:HI (match_operand:DI 0 "register_operand" "")) + (match_operand:HI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsethi_rex_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_expand "strsetqi" + [(set (mem:QI (match_operand:SI 0 "register_operand" "")) + (match_operand:QI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_strsetqi_rex64 (operands[0], operands[1])); + DONE; + } + else if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_expand "strsetqi_rex64" + [(set (mem:QI (match_operand:DI 0 "register_operand" "")) + (match_operand:QI 1 "register_operand" "")) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] + "TARGET_64BIT" +{ + if (TARGET_SINGLE_STRINGOP || optimize_size) + { + emit_insn (gen_strsetqi_rex_1 (operands[0], operands[0], operands[1])); + DONE; + } +}) + +(define_insn "strsetdi_rex_1" + [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 8))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosq" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "strsetsi_1" + [(set (mem:SI (match_operand:SI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 4))) + (use (reg:SI 19))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{stosl|stosd}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "strsetsi_rex_1" + [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 4))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "{stosl|stosd}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "strsethi_1" + [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 2))) + (use (reg:SI 19))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "strsethi_rex_1" + [(set (mem:HI (match_operand:DI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 2))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "strsetqi_1" + [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 1))) + (use (reg:SI 19))] + "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "strsetqi_rex_1" + [(set (mem:QI (match_operand:DI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 1))) + (use (reg:SI 19))] + "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "rep_stosdi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:DI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI 19))] + "TARGET_64BIT" + "{rep\;stosq|rep stosq}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "rep_stossi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1") + (const_int 2)) + (match_operand:SI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI 19))] + "!TARGET_64BIT" + "{rep\;stosl|rep stosd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "rep_stossi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 2)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI 19))] + "TARGET_64BIT" + "{rep\;stosl|rep stosd}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "rep_stosqi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:SI 19))] + "!TARGET_64BIT" + "{rep\;stosb|rep stosb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "rep_stosqi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_operand:DI 3 "register_operand" "0") + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4)) + (use (reg:DI 19))] + "TARGET_64BIT" + "{rep\;stosb|rep stosb}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_expand "cmpstrsi" [(set (match_operand:SI 0 "register_operand" "") - (if_then_else:SI (match_operand 1 "comparison_operator" "") - (match_operand:SI 2 "nonimmediate_operand" "") - (match_operand:SI 3 "nonimmediate_operand" "")))] - "TARGET_CMOVE" - " + (compare:SI (match_operand:BLK 1 "general_operand" "") + (match_operand:BLK 2 "general_operand" ""))) + (use (match_operand 3 "general_operand" "")) + (use (match_operand 4 "immediate_operand" ""))] + "" { - if (GET_MODE_CLASS (GET_MODE (i386_compare_op0)) != MODE_INT) - FAIL; + rtx addr1, addr2, out, outlow, count, countreg, align; - operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), - GET_MODE (i386_compare_op0), - i386_compare_op0, i386_compare_op1); -}") + out = operands[0]; + if (GET_CODE (out) != REG) + out = gen_reg_rtx (SImode); -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") - (if_then_else:SI (match_operator 1 "comparison_operator" - [(match_operand:QI 2 "nonimmediate_operand" "q,m,q,m") - (match_operand:QI 3 "general_operand" "qmn,qn,qmn,qn")]) - (match_operand:SI 4 "nonimmediate_operand" "rm,rm,0,0") - (match_operand:SI 5 "nonimmediate_operand" "0,0,rm,rm")))] - "TARGET_CMOVE" - "#") + addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); + + count = operands[3]; + countreg = ix86_zero_extend_to_Pmode (count); -(define_insn "" - [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") - (if_then_else:SI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "r,m,r,m") - (match_operand 3 "general_operand" "rmi,ri,rmi,ri")]) - (match_operand:SI 4 "nonimmediate_operand" "rm,rm,0,0") - (match_operand:SI 5 "nonimmediate_operand" "0,0,rm,rm")))] - "TARGET_CMOVE && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT" - "#") + /* %%% Iff we are testing strict equality, we can use known alignment + to good advantage. This may be possible with combine, particularly + once cc0 is dead. */ + align = operands[4]; -(define_split + emit_insn (gen_cld ()); + if (GET_CODE (count) == CONST_INT) + { + if (INTVAL (count) == 0) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + if (TARGET_64BIT) + emit_insn (gen_cmpstrqi_nz_rex_1 (addr1, addr2, countreg, align, + addr1, addr2, countreg)); + else + emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align, + addr1, addr2, countreg)); + } + else + { + if (TARGET_64BIT) + { + emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); + emit_insn (gen_cmpstrqi_rex_1 (addr1, addr2, countreg, align, + addr1, addr2, countreg)); + } + else + { + emit_insn (gen_cmpsi_1 (countreg, countreg)); + emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align, + addr1, addr2, countreg)); + } + } + + outlow = gen_lowpart (QImode, out); + emit_insn (gen_cmpintqi (outlow)); + emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow)); + + if (operands[0] != out) + emit_move_insn (operands[0], out); + + DONE; +}) + +;; Produce a tri-state integer (-1, 0, 1) from condition codes. + +(define_expand "cmpintqi" + [(set (match_dup 1) + (gtu:QI (reg:CC 17) (const_int 0))) + (set (match_dup 2) + (ltu:QI (reg:CC 17) (const_int 0))) + (parallel [(set (match_operand:QI 0 "register_operand" "") + (minus:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "" + "operands[1] = gen_reg_rtx (QImode); + operands[2] = gen_reg_rtx (QImode);") + +;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is +;; zero. Emit extra code to make sure that a zero-length compare is EQ. + +(define_insn "cmpstrqi_nz_1" + [(set (reg:CC 17) + (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) + (mem:BLK (match_operand:SI 5 "register_operand" "1")))) + (use (match_operand:SI 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI 19)) + (clobber (match_operand:SI 0 "register_operand" "=S")) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (match_operand:SI 2 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "cmpstrqi_nz_rex_1" + [(set (reg:CC 17) + (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) + (mem:BLK (match_operand:DI 5 "register_operand" "1")))) + (use (match_operand:DI 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:SI 19)) + (clobber (match_operand:DI 0 "register_operand" "=S")) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (match_operand:DI 2 "register_operand" "=c"))] + "TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; The same, but the count is not known to not be zero. + +(define_insn "cmpstrqi_1" + [(set (reg:CC 17) + (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) + (mem:BLK (match_operand:SI 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC 17)) + (use (reg:SI 19)) + (clobber (match_operand:SI 0 "register_operand" "=S")) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (match_operand:SI 2 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "cmpstrqi_rex_1" + [(set (reg:CC 17) + (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) + (mem:BLK (match_operand:DI 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC 17)) + (use (reg:SI 19)) + (clobber (match_operand:DI 0 "register_operand" "=S")) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (match_operand:DI 2 "register_operand" "=c"))] + "TARGET_64BIT" + "repz{\;| }cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_expand "strlensi" [(set (match_operand:SI 0 "register_operand" "") - (if_then_else:SI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (const_int 0)]) - (match_operand:SI 3 "nonimmediate_operand" "") - (match_operand:SI 4 "nonimmediate_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (match_dup 2)) - (set (match_dup 0) - (if_then_else:SI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 3) (match_dup 4)))] + (unspec:SI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] 0))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlendi" + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] 0))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_insn "strlenqi_1" + [(set (match_operand:SI 0 "register_operand" "=&c") + (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "register_operand" "0")] 0)) + (use (reg:SI 19)) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "repnz{\;| }scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "strlenqi_rex_1" + [(set (match_operand:DI 0 "register_operand" "=&c") + (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:DI 3 "immediate_operand" "i") + (match_operand:DI 4 "register_operand" "0")] 0)) + (use (reg:SI 19)) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "repnz{\;| }scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; Peephole optimizations to clean up after cmpstr*. This should be +;; handled in combine, but it is not currently up to the task. +;; When used for their truth value, the cmpstr* expanders generate +;; code like this: +;; +;; repz cmpsb +;; seta %al +;; setb %dl +;; cmpb %al, %dl +;; jcc label +;; +;; The intermediate three instructions are unnecessary. + +;; This one handles cmpstr*_nz_1... +(define_peephole2 + [(parallel[ + (set (reg:CC 17) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" "")))) + (use (match_operand 6 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:SI 19)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC 17) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC 17) (const_int 0))) + (set (reg 17) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC 17) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5)))) + (use (match_dup 6)) + (use (match_dup 3)) + (use (reg:SI 19)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] "") -(define_split - [(set (match_operand:SI 0 "register_operand" "") - (if_then_else:SI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "general_operand" "")]) - (match_operand:SI 4 "nonimmediate_operand" "") - (match_operand:SI 5 "nonimmediate_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) (compare (match_dup 2) (match_dup 3))) - (set (match_dup 0) - (if_then_else:SI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 4) (match_dup 5)))] +;; ...and this one handles cmpstr*_1. +(define_peephole2 + [(parallel[ + (set (reg:CC 17) + (if_then_else:CC (ne (match_operand 6 "register_operand" "") + (const_int 0)) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" ""))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC 17)) + (use (reg:SI 19)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC 17) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC 17) (const_int 0))) + (set (reg 17) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC 17) + (if_then_else:CC (ne (match_dup 6) + (const_int 0)) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5))) + (const_int 0))) + (use (match_dup 3)) + (use (reg:CC 17)) + (use (reg:SI 19)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] "") -(define_insn "" + + +;; Conditional move instructions. + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "general_operand" "") + (match_operand:DI 3 "general_operand" "")))] + "TARGET_64BIT" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +(define_insn "x86_movdicc_0_m1_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (if_then_else:DI (ltu (reg:CC 17) (const_int 0)) + (const_int -1) + (const_int 0))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "sbb{q}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movdicc_c_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand:DI 2 "nonimmediate_operand" "rm,0") + (match_operand:DI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_64BIT && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + cmov%C1\t{%2, %0|%0, %2} + cmov%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "DI")]) + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "general_operand" "") + (match_operand:SI 3 "general_operand" "")))] + "" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing +;; the register first winds up with `sbbl $0,reg', which is also weird. +;; So just document what we're doing explicitly. + +(define_insn "x86_movsicc_0_m1" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI (ltu (reg:CC 17) (const_int 0)) + (const_int -1) + (const_int 0))) + (clobber (reg:CC 17))] + "" + "sbb{l}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsicc_noc" [(set (match_operand:SI 0 "register_operand" "=r,r") - (if_then_else:SI (match_operator 1 "comparison_operator" - [(cc0) (const_int 0)]) + (if_then_else:SI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) (match_operand:SI 2 "nonimmediate_operand" "rm,0") (match_operand:SI 3 "nonimmediate_operand" "0,rm")))] - "TARGET_CMOVE && reload_completed" - "* return output_int_conditional_move (which_alternative, operands);") + "TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + cmov%C1\t{%2, %0|%0, %2} + cmov%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) (define_expand "movhicc" [(set (match_operand:HI 0 "register_operand" "") (if_then_else:HI (match_operand 1 "comparison_operator" "") (match_operand:HI 2 "nonimmediate_operand" "") (match_operand:HI 3 "nonimmediate_operand" "")))] - "TARGET_CMOVE" - " -{ - if (GET_MODE_CLASS (GET_MODE (i386_compare_op0)) != MODE_INT) - FAIL; - - operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), - GET_MODE (i386_compare_op0), - i386_compare_op0, i386_compare_op1); -}") - -(define_insn "" - [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") - (if_then_else:HI (match_operator 1 "comparison_operator" - [(match_operand:QI 2 "nonimmediate_operand" "q,m,q,m") - (match_operand:QI 3 "general_operand" "qmn,qn,qmn,qn")]) - (match_operand:HI 4 "nonimmediate_operand" "rm,rm,0,0") - (match_operand:HI 5 "nonimmediate_operand" "0,0,rm,rm")))] - "TARGET_CMOVE" - "#") + "TARGET_CMOVE && TARGET_HIMODE_MATH" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") -(define_insn "" - [(set (match_operand:HI 0 "register_operand" "=r,r,r,r") - (if_then_else:HI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "r,m,r,m") - (match_operand 3 "general_operand" "rmi,ri,rmi,ri")]) - (match_operand:HI 4 "nonimmediate_operand" "rm,rm,0,0") - (match_operand:HI 5 "nonimmediate_operand" "0,0,rm,rm")))] - "TARGET_CMOVE && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT" - "#") - -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (if_then_else:HI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (const_int 0)]) - (match_operand:HI 3 "nonimmediate_operand" "") - (match_operand:HI 4 "nonimmediate_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (match_dup 2)) - (set (match_dup 0) - (if_then_else:HI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 3) (match_dup 4)))] - "") - -(define_split - [(set (match_operand:HI 0 "register_operand" "") - (if_then_else:HI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "general_operand" "")]) - (match_operand:HI 4 "nonimmediate_operand" "") - (match_operand:HI 5 "nonimmediate_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (compare (match_dup 2) (match_dup 3))) - (set (match_dup 0) - (if_then_else:HI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 4) (match_dup 5)))] - "") - -(define_insn "" +(define_insn "*movhicc_noc" [(set (match_operand:HI 0 "register_operand" "=r,r") - (if_then_else:HI (match_operator 1 "comparison_operator" - [(cc0) (const_int 0)]) + (if_then_else:HI (match_operator 1 "ix86_comparison_operator" + [(reg 17) (const_int 0)]) (match_operand:HI 2 "nonimmediate_operand" "rm,0") (match_operand:HI 3 "nonimmediate_operand" "0,rm")))] - "TARGET_CMOVE && reload_completed" - "* return output_int_conditional_move (which_alternative, operands);") + "TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + cmov%C1\t{%2, %0|%0, %2} + cmov%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "HI")]) (define_expand "movsfcc" [(set (match_operand:SF 0 "register_operand" "") @@ -7754,425 +15825,830 @@ byte_xor_operation: (match_operand:SF 2 "register_operand" "") (match_operand:SF 3 "register_operand" "")))] "TARGET_CMOVE" - " -{ - rtx temp; - - if (GET_MODE_CLASS (GET_MODE (i386_compare_op0)) != MODE_INT) - FAIL; - - /* The floating point conditional move instructions don't directly - support conditions resulting from a signed integer comparison. */ + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + +(define_insn "*movsfcc_1" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%C1\t{%2, %0|%0, %2} + cmov%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "SF,SF,SI,SI")]) - switch (GET_CODE (operands[1])) - { - case LT: - case LE: - case GE: - case GT: - temp = emit_store_flag (gen_reg_rtx (QImode), - GET_CODE (operands[1]), i386_compare_op0, i386_compare_op1, - VOIDmode, 0, 0); +(define_expand "movdfcc" + [(set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (match_operand 1 "comparison_operator" "") + (match_operand:DF 2 "register_operand" "") + (match_operand:DF 3 "register_operand" "")))] + "TARGET_CMOVE" + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + +(define_insn "*movdfcc_1" + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "!TARGET_64BIT && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + # + #" + [(set_attr "type" "fcmov,fcmov,multi,multi") + (set_attr "mode" "DF")]) + +(define_insn "*movdfcc_1_rex64" + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_64BIT && TARGET_CMOVE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%C1\t{%2, %0|%0, %2} + cmov%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "DF")]) - if (!temp) - FAIL; +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(match_operand 4 "" "") (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "") + (match_operand:DF 3 "nonimmediate_operand" "")))] + "!TARGET_64BIT && !ANY_FP_REG_P (operands[0]) && reload_completed" + [(set (match_dup 2) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 5) + (match_dup 7))) + (set (match_dup 3) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 6) + (match_dup 8)))] + "split_di (operands+2, 1, operands+5, operands+6); + split_di (operands+3, 1, operands+7, operands+8); + split_di (operands, 1, operands+2, operands+3);") - operands[1] = gen_rtx_fmt_ee (NE, QImode, temp, const0_rtx); - break; +(define_expand "movxfcc" + [(set (match_operand:XF 0 "register_operand" "") + (if_then_else:XF (match_operand 1 "comparison_operator" "") + (match_operand:XF 2 "register_operand" "") + (match_operand:XF 3 "register_operand" "")))] + "!TARGET_64BIT && TARGET_CMOVE" + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") + +(define_expand "movtfcc" + [(set (match_operand:TF 0 "register_operand" "") + (if_then_else:TF (match_operand 1 "comparison_operator" "") + (match_operand:TF 2 "register_operand" "") + (match_operand:TF 3 "register_operand" "")))] + "TARGET_CMOVE" + "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - default: - operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), - GET_MODE (i386_compare_op0), - i386_compare_op0, i386_compare_op1); - break; - } -}") +(define_insn "*movxfcc_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand:XF 2 "register_operand" "f,0") + (match_operand:XF 3 "register_operand" "0,f")))] + "!TARGET_64BIT && TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +(define_insn "*movtfcc_1" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (if_then_else:TF (match_operator 1 "fcmov_comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand:TF 2 "register_operand" "f,0") + (match_operand:TF 3 "register_operand" "0,f")))] + "TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +(define_expand "minsf3" + [(parallel [ + (set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "TARGET_SSE" + "") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f,f,f") - (if_then_else:SF (match_operator 1 "comparison_operator" - [(match_operand:QI 2 "nonimmediate_operand" "q,m,q,m") - (match_operand:QI 3 "general_operand" "qmn,qn,qmn,qn")]) - (match_operand:SF 4 "register_operand" "f,f,0,0") - (match_operand:SF 5 "register_operand" "0,0,f,f")))] - "TARGET_CMOVE - && GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != LE - && GET_CODE (operands[1]) != GE && GET_CODE (operands[1]) != GT" +(define_insn "*minsf" + [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x") + (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0,0,f#x") + (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE && TARGET_IEEE_FP" "#") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f,f,f") - (if_then_else:SF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "r,m,r,m") - (match_operand 3 "general_operand" "rmi,ri,rmi,ri")]) - (match_operand:SF 4 "register_operand" "f,f,0,0") - (match_operand:SF 5 "register_operand" "0,0,f,f")))] - "TARGET_CMOVE && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT - && GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != LE - && GET_CODE (operands[1]) != GE && GET_CODE (operands[1]) != GT" +(define_insn "*minsf_nonieee" + [(set (match_operand:SF 0 "register_operand" "=x#f,f#x") + (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "%0,0") + (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE && !TARGET_IEEE_FP" "#") (define_split [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (const_int 0)]) - (match_operand:SF 3 "register_operand" "") - (match_operand:SF 4 "register_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (match_dup 2)) - (set (match_dup 0) - (if_then_else:SF (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 3) (match_dup 4)))] - "") + (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")) + (match_operand:SF 3 "register_operand" "") + (match_operand:SF 4 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "SSE_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (match_dup 0) + (if_then_else:SF (lt (match_dup 1) + (match_dup 2)) + (match_dup 1) + (match_dup 2)))]) + +;; We can't represent the LT test directly. Do this by swapping the operands. (define_split [(set (match_operand:SF 0 "register_operand" "") - (if_then_else:SF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "general_operand" "")]) - (match_operand:SF 4 "register_operand" "") - (match_operand:SF 5 "register_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) (compare (match_dup 2) (match_dup 3))) + (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "register_operand" "")) + (match_operand:SF 3 "register_operand" "") + (match_operand:SF 4 "register_operand" ""))) + (clobber (reg:CC 17))] + "FP_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (reg:CCFP 17) + (compare:CCFP (match_dup 2) + (match_dup 1))) (set (match_dup 0) - (if_then_else:SF (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 4) (match_dup 5)))] - "") + (if_then_else:SF (ge (reg:CCFP 17) (const_int 0)) + (match_dup 1) + (match_dup 2)))]) + +(define_insn "*minsf_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (match_dup 2)))] + "TARGET_SSE && reload_completed" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "mindf3" + [(parallel [ + (set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "TARGET_SSE2 && TARGET_SSE_MATH" + "#") -(define_insn "" - [(set (match_operand:SF 0 "register_operand" "=f,f") - (if_then_else:SF (match_operator 1 "comparison_operator" - [(cc0) (const_int 0)]) - (match_operand:SF 2 "register_operand" "f,0") - (match_operand:SF 3 "register_operand" "0,f")))] - "TARGET_CMOVE && reload_completed" - "* return output_fp_conditional_move (which_alternative, operands);") +(define_insn "*mindf" + [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y") + (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0,0,f#Y") + (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE2 && TARGET_IEEE_FP && TARGET_SSE_MATH" + "#") -(define_expand "movdfcc" - [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operand 1 "comparison_operator" "") - (match_operand:DF 2 "register_operand" "") - (match_operand:DF 3 "register_operand" "")))] - "TARGET_CMOVE" - " -{ - rtx temp; +(define_insn "*mindf_nonieee" + [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y") + (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "%0,0") + (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP" + "#") - if (GET_MODE_CLASS (GET_MODE (i386_compare_op0)) != MODE_INT) - FAIL; +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")) + (match_operand:DF 3 "register_operand" "") + (match_operand:DF 4 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "SSE_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (match_dup 0) + (if_then_else:DF (lt (match_dup 1) + (match_dup 2)) + (match_dup 1) + (match_dup 2)))]) - /* The floating point conditional move instructions don't directly - support conditions resulting from a signed integer comparison. */ +;; We can't represent the LT test directly. Do this by swapping the operands. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "register_operand" "")) + (match_operand:DF 3 "register_operand" "") + (match_operand:DF 4 "register_operand" ""))) + (clobber (reg:CC 17))] + "FP_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (reg:CCFP 17) + (compare:CCFP (match_dup 2) + (match_dup 2))) + (set (match_dup 0) + (if_then_else:DF (ge (reg:CCFP 17) (const_int 0)) + (match_dup 1) + (match_dup 2)))]) + +(define_insn "*mindf_sse" + [(set (match_operand:DF 0 "register_operand" "=Y") + (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "Ym")) + (match_dup 1) + (match_dup 2)))] + "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" + "minsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) + +(define_expand "maxsf3" + [(parallel [ + (set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "TARGET_SSE" + "#") - switch (GET_CODE (operands[1])) - { - case LT: - case LE: - case GE: - case GT: - temp = emit_store_flag (gen_reg_rtx (QImode), - GET_CODE (operands[1]), i386_compare_op0, i386_compare_op1, - VOIDmode, 0, 0); +(define_insn "*maxsf" + [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x") + (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0,0,f#x") + (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE && TARGET_IEEE_FP" + "#") - if (!temp) - FAIL; +(define_insn "*maxsf_nonieee" + [(set (match_operand:SF 0 "register_operand" "=x#f,f#x") + (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "%0,0") + (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE && !TARGET_IEEE_FP" + "#") - operands[1] = gen_rtx_fmt_ee (NE, QImode, temp, const0_rtx); - break; +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")) + (match_operand:SF 3 "register_operand" "") + (match_operand:SF 4 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "SSE_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (match_dup 0) + (if_then_else:SF (gt (match_dup 1) + (match_dup 2)) + (match_dup 1) + (match_dup 2)))]) - default: - operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), - GET_MODE (i386_compare_op0), - i386_compare_op0, i386_compare_op1); - break; - } -}") +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "register_operand" "")) + (match_operand:SF 3 "register_operand" "") + (match_operand:SF 4 "register_operand" ""))) + (clobber (reg:CC 17))] + "FP_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (reg:CCFP 17) + (compare:CCFP (match_dup 1) + (match_dup 2))) + (set (match_dup 0) + (if_then_else:SF (gt (reg:CCFP 17) (const_int 0)) + (match_dup 1) + (match_dup 2)))]) + +(define_insn "*maxsf_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (match_dup 2)))] + "TARGET_SSE && reload_completed" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "SF")]) + +(define_expand "maxdf3" + [(parallel [ + (set (match_operand:DF 0 "register_operand" "") + (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))])] + "TARGET_SSE2 && TARGET_SSE_MATH" + "#") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f,f,f") - (if_then_else:DF (match_operator 1 "comparison_operator" - [(match_operand:QI 2 "nonimmediate_operand" "q,m,q,m") - (match_operand:QI 3 "general_operand" "qmn,qn,qmn,qn")]) - (match_operand:DF 4 "register_operand" "f,f,0,0") - (match_operand:DF 5 "register_operand" "0,0,f,f")))] - "TARGET_CMOVE - && GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != LE - && GET_CODE (operands[1]) != GE && GET_CODE (operands[1]) != GT" +(define_insn "*maxdf" + [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y") + (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0,0,f#Y") + (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_IEEE_FP" "#") -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f,f,f") - (if_then_else:DF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "r,m,r,m") - (match_operand 3 "general_operand" "rmi,ri,rmi,ri")]) - (match_operand:DF 4 "register_operand" "f,f,0,0") - (match_operand:DF 5 "register_operand" "0,0,f,f")))] - "TARGET_CMOVE && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT - && GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != LE - && GET_CODE (operands[1]) != GE && GET_CODE (operands[1]) != GT" +(define_insn "*maxdf_nonieee" + [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y") + (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "%0,0") + (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y")) + (match_dup 1) + (match_dup 2))) + (clobber (reg:CC 17))] + "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP" "#") (define_split [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (const_int 0)]) - (match_operand:DF 3 "register_operand" "") - (match_operand:DF 4 "register_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (match_dup 2)) - (set (match_dup 0) - (if_then_else:DF (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 3) (match_dup 4)))] - "") + (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")) + (match_operand:DF 3 "register_operand" "") + (match_operand:DF 4 "nonimmediate_operand" ""))) + (clobber (reg:CC 17))] + "SSE_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (match_dup 0) + (if_then_else:DF (gt (match_dup 1) + (match_dup 2)) + (match_dup 1) + (match_dup 2)))]) (define_split [(set (match_operand:DF 0 "register_operand" "") - (if_then_else:DF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "general_operand" "")]) - (match_operand:DF 4 "register_operand" "") - (match_operand:DF 5 "register_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) (compare (match_dup 2) (match_dup 3))) + (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "register_operand" "")) + (match_operand:DF 3 "register_operand" "") + (match_operand:DF 4 "register_operand" ""))) + (clobber (reg:CC 17))] + "FP_REG_P (operands[0]) && reload_completed + && ((operands_match_p (operands[1], operands[3]) + && operands_match_p (operands[2], operands[4])) + || (operands_match_p (operands[1], operands[4]) + && operands_match_p (operands[2], operands[3])))" + [(set (reg:CCFP 17) + (compare:CCFP (match_dup 1) + (match_dup 2))) (set (match_dup 0) - (if_then_else:DF (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 4) (match_dup 5)))] - "") - -(define_insn "" - [(set (match_operand:DF 0 "register_operand" "=f,f") - (if_then_else:DF (match_operator 1 "comparison_operator" - [(cc0) (const_int 0)]) - (match_operand:DF 2 "register_operand" "f,0") - (match_operand:DF 3 "register_operand" "0,f")))] - "TARGET_CMOVE && reload_completed" - "* return output_fp_conditional_move (which_alternative, operands);") + (if_then_else:DF (gt (reg:CCFP 17) (const_int 0)) + (match_dup 1) + (match_dup 2)))]) + +(define_insn "*maxdf_sse" + [(set (match_operand:DF 0 "register_operand" "=Y") + (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "Ym")) + (match_dup 1) + (match_dup 2)))] + "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed" + "maxsd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse") + (set_attr "mode" "DF")]) + +;; Misc patterns (?) -(define_expand "movxfcc" - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operand 1 "comparison_operator" "") - (match_operand:XF 2 "register_operand" "") - (match_operand:XF 3 "register_operand" "")))] - "TARGET_CMOVE" - " +;; This pattern exists to put a dependency on all ebp-based memory accesses. +;; Otherwise there will be nothing to keep +;; +;; [(set (reg ebp) (reg esp))] +;; [(set (reg esp) (plus (reg esp) (const_int -160000))) +;; (clobber (eflags)] +;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] +;; +;; in proper program order. +(define_expand "pro_epilogue_adjust_stack" + [(parallel [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "0,r") + (match_operand:SI 2 "immediate_operand" "i,i"))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "" { - rtx temp; + if (TARGET_64BIT) + { + emit_insn (gen_pro_epilogue_adjust_stack_rex64 + (operands[0], operands[1], operands[2])); + DONE; + } +}) - if (GET_MODE_CLASS (GET_MODE (i386_compare_op0)) != MODE_INT) - FAIL; +(define_insn "*pro_epilogue_adjust_stack_1" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "0,r") + (match_operand:SI 2 "immediate_operand" "i,i"))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{l}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; - /* The floating point conditional move instructions don't directly - support conditions resulting from a signed integer comparison. */ + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %0|%0, %a2}"; - switch (GET_CODE (operands[1])) + default: + abort (); + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0") + (const_string "alu") + (match_operand:SI 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set_attr "mode" "SI")]) + +(define_insn "pro_epilogue_adjust_stack_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) { - case LT: - case LE: - case GE: - case GT: - temp = emit_store_flag (gen_reg_rtx (QImode), - GET_CODE (operands[1]), i386_compare_op0, i386_compare_op1, - VOIDmode, 0, 0); - - if (!temp) - FAIL; + case TYPE_IMOV: + return "mov{q}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; - operands[1] = gen_rtx_fmt_ee (NE, QImode, temp, const0_rtx); - break; + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{q}\t{%a2, %0|%0, %a2}"; default: - operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), - GET_MODE (i386_compare_op0), - i386_compare_op0, i386_compare_op1); - break; + abort (); } -}") +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0") + (const_string "alu") + (match_operand:DI 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set_attr "mode" "DI")]) + + +;; Placeholder for the conditional moves. This one is split either to SSE +;; based moves emulation or to usual cmove sequence. Little bit unfortunate +;; fact is that compares supported by the cmp??ss instructions are exactly +;; swapped of those supported by cmove sequence. +;; The EQ/NE comparisons also needs bit care, since they are not directly +;; supported by i387 comparisons and we do need to emit two conditional moves +;; in tandem. + +(define_insn "sse_movsfcc" + [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf") + (if_then_else:SF (match_operator 1 "sse_comparison_operator" + [(match_operand:SF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f") + (match_operand:SF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")]) + (match_operand:SF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx") + (match_operand:SF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx"))) + (clobber (match_scratch:SF 6 "=2,&4,X,X,X,X,X,X,X,X")) + (clobber (reg:CC 17))] + "TARGET_SSE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + && (!TARGET_IEEE_FP + || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" + "#") -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f,f,f,f") - (if_then_else:XF (match_operator 1 "comparison_operator" - [(match_operand:QI 2 "nonimmediate_operand" "q,m,q,m") - (match_operand:QI 3 "general_operand" "qmn,qn,qmn,qn")]) - (match_operand:XF 4 "register_operand" "f,f,0,0") - (match_operand:XF 5 "register_operand" "0,0,f,f")))] - "TARGET_CMOVE - && GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != LE - && GET_CODE (operands[1]) != GE && GET_CODE (operands[1]) != GT" +(define_insn "sse_movsfcc_eq" + [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?r#xf,?r#xf") + (if_then_else:SF (eq (match_operand:SF 3 "nonimmediate_operand" "%0#fx,x#fx,f#x,xm#f,f#x,xm#f") + (match_operand:SF 4 "nonimmediate_operand" "xm#f,xm#f,f#x,x#f,f#x,x#f")) + (match_operand:SF 1 "nonimmediate_operand" "x#fr,0#fr,0#fx,0#fx,0#rx,0#rx") + (match_operand:SF 2 "nonimmediate_operand" "x#fr,x#fr,f#fx,f#fx,rm#rx,rm#rx"))) + (clobber (match_scratch:SF 5 "=1,&3,X,X,X,X")) + (clobber (reg:CC 17))] + "TARGET_SSE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "#") -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f,f,f,f") - (if_then_else:XF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "r,m,r,m") - (match_operand 3 "general_operand" "rmi,ri,rmi,ri")]) - (match_operand:XF 4 "register_operand" "f,f,0,0") - (match_operand:XF 5 "register_operand" "0,0,f,f")))] - "TARGET_CMOVE && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT - && GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != LE - && GET_CODE (operands[1]) != GE && GET_CODE (operands[1]) != GT" +(define_insn "sse_movdfcc" + [(set (match_operand:DF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf") + (if_then_else:DF (match_operator 1 "sse_comparison_operator" + [(match_operand:DF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f") + (match_operand:DF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")]) + (match_operand:DF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx") + (match_operand:DF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx"))) + (clobber (match_scratch:DF 6 "=2,&4,X,X,X,X,X,X,X,X")) + (clobber (reg:CC 17))] + "TARGET_SSE2 + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + && (!TARGET_IEEE_FP + || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") -(define_split - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (const_int 0)]) - (match_operand:XF 3 "register_operand" "") - (match_operand:XF 4 "register_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (match_dup 2)) - (set (match_dup 0) - (if_then_else:XF (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 3) (match_dup 4)))] - "") +(define_insn "sse_movdfcc_eq" + [(set (match_operand:DF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?r#xf,?r#xf") + (if_then_else:DF (eq (match_operand:DF 3 "nonimmediate_operand" "%0#fx,x#fx,f#x,xm#f,f#x,xm#f") + (match_operand:DF 4 "nonimmediate_operand" "xm#f,xm#f,f#x,x#f,f#x,x#f")) + (match_operand:DF 1 "nonimmediate_operand" "x#fr,0#fr,0#fx,0#fx,0#rx,0#rx") + (match_operand:DF 2 "nonimmediate_operand" "x#fr,x#fr,f#fx,f#fx,rm#rx,rm#rx"))) + (clobber (match_scratch:DF 5 "=1,&3,X,X,X,X")) + (clobber (reg:CC 17))] + "TARGET_SSE + && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" + "#") +;; For non-sse moves just expand the usual cmove sequence. (define_split - [(set (match_operand:XF 0 "register_operand" "") - (if_then_else:XF (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "general_operand" "")]) - (match_operand:XF 4 "register_operand" "") - (match_operand:XF 5 "register_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) (compare (match_dup 2) (match_dup 3))) - (set (match_dup 0) - (if_then_else:XF (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 4) (match_dup 5)))] - "") + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "comparison_operator" + [(match_operand 4 "nonimmediate_operand" "") + (match_operand 5 "register_operand" "")]) + (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "nonimmediate_operand" ""))) + (clobber (match_operand 6 "" "")) + (clobber (reg:CC 17))] + "!SSE_REG_P (operands[0]) && reload_completed + && VALID_SSE_REG_MODE (GET_MODE (operands[0]))" + [(const_int 0)] +{ + ix86_compare_op0 = operands[5]; + ix86_compare_op1 = operands[4]; + operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])), + VOIDmode, operands[5], operands[4]); + ix86_expand_fp_movcc (operands); + DONE; +}) + +;; Split SSE based conditional move into seqence: +;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison +;; and op2, op0 - zero op2 if comparison was false +;; nand op0, op3 - load op3 to op0 if comparison was false +;; or op2, op0 - get the non-zero one into the result. +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "sse_comparison_operator" + [(match_operand 4 "register_operand" "") + (match_operand 5 "nonimmediate_operand" "")]) + (match_operand 2 "register_operand" "") + (match_operand 3 "register_operand" ""))) + (clobber (match_operand 6 "" "")) + (clobber (reg:CC 17))] + "SSE_REG_P (operands[0]) && reload_completed" + [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)])) + (set (subreg:TI (match_dup 2) 0) (and:TI (subreg:TI (match_dup 2) 0) + (subreg:TI (match_dup 4) 0))) + (set (subreg:TI (match_dup 4) 0) (and:TI (not:TI (subreg:TI (match_dup 4) 0)) + (subreg:TI (match_dup 3) 0))) + (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) + (subreg:TI (match_dup 7) 0)))] +{ + PUT_MODE (operands[1], GET_MODE (operands[0])); + if (operands_match_p (operands[0], operands[4])) + operands[6] = operands[4], operands[7] = operands[2]; + else + operands[6] = operands[2], operands[7] = operands[4]; +}) + +;; Special case of conditional move we can handle effectivly. +;; Do not brother with the integer/floating point case, since these are +;; bot considerably slower, unlike in the generic case. +(define_insn "*sse_movsfcc_const0_1" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "sse_comparison_operator" + [(match_operand:SF 4 "register_operand" "0") + (match_operand:SF 5 "nonimmediate_operand" "xm")]) + (match_operand:SF 2 "register_operand" "x") + (match_operand:SF 3 "const0_operand" "X")))] + "TARGET_SSE" + "#") -(define_insn "" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (if_then_else:XF (match_operator 1 "comparison_operator" - [(cc0) (const_int 0)]) - (match_operand:XF 2 "register_operand" "f,0") - (match_operand:XF 3 "register_operand" "0,f")))] - "TARGET_CMOVE && reload_completed" - "* return output_fp_conditional_move (which_alternative, operands);") +(define_insn "*sse_movsfcc_const0_2" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "sse_comparison_operator" + [(match_operand:SF 4 "register_operand" "0") + (match_operand:SF 5 "nonimmediate_operand" "xm")]) + (match_operand:SF 2 "const0_operand" "X") + (match_operand:SF 3 "register_operand" "x")))] + "TARGET_SSE" + "#") -(define_expand "movdicc" - [(set (match_operand:DI 0 "register_operand" "") - (if_then_else:DI (match_operand 1 "comparison_operator" "") - (match_operand:DI 2 "nonimmediate_operand" "") - (match_operand:DI 3 "nonimmediate_operand" "")))] - "TARGET_CMOVE" - " -{ - if (GET_MODE_CLASS (GET_MODE (i386_compare_op0)) != MODE_INT) - FAIL; +(define_insn "*sse_movsfcc_const0_3" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(match_operand:SF 4 "nonimmediate_operand" "xm") + (match_operand:SF 5 "register_operand" "0")]) + (match_operand:SF 2 "register_operand" "x") + (match_operand:SF 3 "const0_operand" "X")))] + "TARGET_SSE" + "#") - operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), - GET_MODE (i386_compare_op0), - i386_compare_op0, i386_compare_op1); -}") +(define_insn "*sse_movsfcc_const0_4" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(match_operand:SF 4 "nonimmediate_operand" "xm") + (match_operand:SF 5 "register_operand" "0")]) + (match_operand:SF 2 "const0_operand" "X") + (match_operand:SF 3 "register_operand" "x")))] + "TARGET_SSE" + "#") -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=&r,&r,&r,&r") - (if_then_else:DI (match_operator 1 "comparison_operator" - [(match_operand:QI 2 "nonimmediate_operand" "q,m,q,m") - (match_operand:QI 3 "general_operand" "qmn,qn,qmn,qn")]) - (match_operand:DI 4 "nonimmediate_operand" "ro,ro,0,0") - (match_operand:DI 5 "nonimmediate_operand" "0,0,ro,ro")))] - "TARGET_CMOVE" +(define_insn "*sse_movdfcc_const0_1" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "sse_comparison_operator" + [(match_operand:SF 4 "register_operand" "0") + (match_operand:SF 5 "nonimmediate_operand" "xm")]) + (match_operand:SF 2 "register_operand" "x") + (match_operand:SF 3 "const0_operand" "X")))] + "TARGET_SSE2" "#") -(define_insn "" - [(set (match_operand:DI 0 "register_operand" "=&r,&r,&r,&r") - (if_then_else:DI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "r,m,r,m") - (match_operand 3 "general_operand" "rmi,ri,rmi,ri")]) - (match_operand:DI 4 "nonimmediate_operand" "ro,ro,0,0") - (match_operand:DI 5 "nonimmediate_operand" "0,0,ro,ro")))] - "TARGET_CMOVE && GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT" +(define_insn "*sse_movdfcc_const0_2" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "sse_comparison_operator" + [(match_operand:SF 4 "register_operand" "0") + (match_operand:SF 5 "nonimmediate_operand" "xm")]) + (match_operand:SF 2 "const0_operand" "X") + (match_operand:SF 3 "register_operand" "x")))] + "TARGET_SSE2" "#") -(define_split - [(set (match_operand:DI 0 "register_operand" "") - (if_then_else:DI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (const_int 0)]) - (match_operand:DI 3 "nonimmediate_operand" "") - (match_operand:DI 4 "nonimmediate_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) - (match_dup 2)) - (set (match_dup 5) - (if_then_else:SI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 7) (match_dup 9))) - (set (match_dup 6) - (if_then_else:SI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 8) (match_dup 10)))] - "split_di (&operands[0], 1, &operands[5], &operands[6]); - split_di (&operands[3], 1, &operands[7], &operands[8]); - split_di (&operands[4], 1, &operands[9], &operands[10]);") +(define_insn "*sse_movdfcc_const0_3" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(match_operand:SF 4 "nonimmediate_operand" "xm") + (match_operand:SF 5 "register_operand" "0")]) + (match_operand:SF 2 "register_operand" "x") + (match_operand:SF 3 "const0_operand" "X")))] + "TARGET_SSE2" + "#") + +(define_insn "*sse_movdfcc_const0_4" + [(set (match_operand:SF 0 "register_operand" "=x") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(match_operand:SF 4 "nonimmediate_operand" "xm") + (match_operand:SF 5 "register_operand" "0")]) + (match_operand:SF 2 "const0_operand" "X") + (match_operand:SF 3 "register_operand" "x")))] + "TARGET_SSE2" + "#") (define_split - [(set (match_operand:DI 0 "register_operand" "") - (if_then_else:DI (match_operator 1 "comparison_operator" - [(match_operand 2 "nonimmediate_operand" "") - (match_operand 3 "general_operand" "")]) - (match_operand:DI 4 "nonimmediate_operand" "") - (match_operand:DI 5 "nonimmediate_operand" "")))] - "TARGET_CMOVE && reload_completed" - [(set (cc0) (compare (match_dup 2) (match_dup 3))) - (set (match_dup 6) - (if_then_else:SI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 8) (match_dup 10))) - (set (match_dup 7) - (if_then_else:SI (match_op_dup 1 [(cc0) (const_int 0)]) - (match_dup 9) (match_dup 11)))] - "split_di (&operands[0], 1, &operands[6], &operands[7]); - split_di (&operands[4], 1, &operands[8], &operands[9]); - split_di (&operands[5], 1, &operands[10], &operands[11]);") - -(define_insn "strlensi_unroll" - [(set (match_operand:SI 0 "register_operand" "=&r,&r") - (unspec:SI [(mem:BLK (match_operand:SI 1 "address_operand" "r,r")) - (match_operand:SI 2 "immediate_operand" "i,i")] 0)) - (clobber (match_scratch:SI 3 "=&q,&r"))] - "optimize > 1" - "* return output_strlen_unroll (operands);") - -;; the only difference between the following patterns is the register preference -;; on a pentium using a q-register saves one clock cycle per 4 characters - -(define_insn "strlensi_unroll4" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0,0")) - (match_operand:SI 1 "immediate_operand" "i,i") - (match_operand:SI 2 "register_operand" "+q,!r")] 0)) - (clobber (match_dup 2))] - "(TARGET_USE_ANY_REG && optimize > 1)" - "* return output_strlen_unroll (operands);") - -(define_insn "strlensi_unroll5" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0")) - (match_operand:SI 1 "immediate_operand" "i") - (match_operand:SI 2 "register_operand" "+q")] 0)) - (clobber (match_dup 2))] - "(TARGET_USE_Q_REG && optimize > 1)" - "* return output_strlen_unroll (operands);" -) - -(define_insn "allocate_stack_worker" + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "comparison_operator" + [(match_operand 4 "register_operand" "") + (match_operand 5 "nonimmediate_operand" "")]) + (match_operand 2 "nonmemory_operand" "") + (match_operand 3 "nonmemory_operand" "")))] + "SSE_REG_P (operands[0]) && reload_completed + && (const0_operand (operands[2], GET_MODE (operands[0])) + || const0_operand (operands[3], GET_MODE (operands[0])))" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)])) + (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6) + (subreg:TI (match_dup 7) 0)))] +{ + PUT_MODE (operands[1], GET_MODE (operands[0])); + if (!sse_comparison_operator (operands[1], VOIDmode)) + { + rtx tmp = operands[5]; + operands[5] = operands[4]; + operands[4] = tmp; + PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); + } + if (const0_operand (operands[2], GET_MODE (operands[0]))) + { + operands[7] = operands[3]; + operands[6] = gen_rtx_NOT (TImode, gen_rtx_SUBREG (TImode, operands[0], + 0)); + } + else + { + operands[7] = operands[2]; + operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0); + } +}) + +(define_expand "allocate_stack_worker" + [(match_operand:SI 0 "register_operand" "")] + "TARGET_STACK_PROBE" +{ + if (TARGET_64BIT) + emit_insn (gen_allocate_stack_worker_rex64 (operands[0])); + else + emit_insn (gen_allocate_stack_worker_1 (operands[0])); + DONE; +}) + +(define_insn "allocate_stack_worker_1" [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3) (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0))) - (clobber (match_dup 0))] - "TARGET_STACK_PROBE" - "* return AS1(call,__alloca);" - [(set_attr "memory" "none")]) + (clobber (match_dup 0)) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_STACK_PROBE" + "call\t__alloca" + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_insn "allocate_stack_worker_rex64" + [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] 3) + (set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0))) + (clobber (match_dup 0)) + (clobber (reg:CC 17))] + "TARGET_64BIT && TARGET_STACK_PROBE" + "call\t__alloca" + [(set_attr "type" "multi") + (set_attr "length" "5")]) (define_expand "allocate_stack" - [(set (match_operand:SI 0 "register_operand" "=r") - (minus:SI (reg:SI 7) (match_operand:SI 1 "general_operand" ""))) - (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 1)))] - "TARGET_STACK_PROBE" - " + [(parallel [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (reg:SI 7) + (match_operand:SI 1 "general_operand" ""))) + (clobber (reg:CC 17))]) + (parallel [(set (reg:SI 7) + (minus:SI (reg:SI 7) (match_dup 1))) + (clobber (reg:CC 17))])] + "TARGET_STACK_PROBE" { #ifdef CHECK_STACK_LIMIT if (GET_CODE (operands[1]) == CONST_INT @@ -8186,22 +16662,2996 @@ byte_xor_operation: emit_move_insn (operands[0], virtual_stack_dynamic_rtx); DONE; -}") +}) -(define_expand "exception_receiver" - [(const_int 0)] - "flag_pic" - " +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "!TARGET_64BIT && flag_pic" { - load_pic_register (1); + load_pic_register (); DONE; -}") +}) + +;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. -(define_expand "builtin_setjmp_receiver" - [(label_ref (match_operand 0 "" ""))] - "flag_pic" - " +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "promotable_binary_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "aligned_operand" "")])) + (clobber (reg:CC 17))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ((GET_MODE (operands[0]) == HImode + && (!optimize_size || GET_CODE (operands[2]) != CONST_INT + || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))) + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + if (GET_CODE (operands[3]) != ASHIFT) + operands[2] = gen_lowpart (SImode, operands[2]); + PUT_MODE (operands[3], SImode);") + +(define_split + [(set (reg 17) + (compare (and (match_operand 1 "aligned_operand" "") + (match_operand 2 "const_int_operand" "")) + (const_int 0))) + (set (match_operand 0 "register_operand" "") + (and (match_dup 1) (match_dup 2)))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ix86_match_ccmode (insn, CCNOmode) + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (and:SI (match_dup 1) (match_dup 2)))])] + "operands[2] + = GEN_INT (trunc_int_for_mode (INTVAL (operands[2]) + & GET_MODE_MASK (GET_MODE (operands[0])), + SImode)); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (reg 17) + (compare (and (match_operand 0 "aligned_operand" "") + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ix86_match_ccmode (insn, CCNOmode) + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(set (reg:CCNO 17) + (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) + (const_int 0)))] + "operands[1] + = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]) + & GET_MODE_MASK (GET_MODE (operands[0])), + SImode)); + operands[0] = gen_lowpart (SImode, operands[0]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (neg (match_operand 1 "register_operand" ""))) + (clobber (reg:CC 17))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(parallel [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC 17))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (not (match_operand 1 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(set (match_dup 0) + (not:SI (match_dup 1)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand 2 "register_operand" "") + (match_operand 3 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_size)))" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);") + + +;; RTL Peephole optimizations, run before sched2. These primarily look to +;; transform a complex memory operation into two memory to register operations. + +;; Don't push memory operands +(define_peephole2 + [(set (match_operand:SI 0 "push_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (match_scratch:SI 2 "r")] + "! optimize_size && ! TARGET_PUSH_MEMORY" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "memory_operand" "")) + (match_scratch:DI 2 "r")] + "! optimize_size && ! TARGET_PUSH_MEMORY" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to handle SFmode only, because DFmode and XFmode is split to +;; SImode pushes. +(define_peephole2 + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" "")) + (match_scratch:SF 2 "r")] + "! optimize_size && ! TARGET_PUSH_MEMORY" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:HI 0 "push_operand" "") + (match_operand:HI 1 "memory_operand" "")) + (match_scratch:HI 2 "r")] + "! optimize_size && ! TARGET_PUSH_MEMORY" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:QI 0 "push_operand" "") + (match_operand:QI 1 "memory_operand" "")) + (match_scratch:QI 2 "q")] + "! optimize_size && ! TARGET_PUSH_MEMORY" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; Don't move an immediate directly to memory when the instruction +;; gets too big. +(define_peephole2 + [(match_scratch:SI 1 "r") + (set (match_operand:SI 0 "memory_operand" "") + (const_int 0))] + "! optimize_size + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 1) (const_int 0)) + (clobber (reg:CC 17))]) + (set (match_dup 0) (match_dup 1))] + "") + +(define_peephole2 + [(match_scratch:HI 1 "r") + (set (match_operand:HI 0 "memory_operand" "") + (const_int 0))] + "! optimize_size + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC 17))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") + +(define_peephole2 + [(match_scratch:QI 1 "q") + (set (match_operand:QI 0 "memory_operand" "") + (const_int 0))] + "! optimize_size + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cost->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC 17))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (set (match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "immediate_operand" ""))] + "! optimize_size + && get_attr_length (insn) >= ix86_cost->large_insn + && TARGET_SPLIT_LONG_MOVES" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:HI 2 "r") + (set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "immediate_operand" ""))] + "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn + && TARGET_SPLIT_LONG_MOVES" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:QI 2 "q") + (set (match_operand:QI 0 "memory_operand" "") + (match_operand:QI 1 "immediate_operand" ""))] + "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn + && TARGET_SPLIT_LONG_MOVES" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; Don't compare memory with zero, load and use a test instead. +(define_peephole2 + [(set (reg 17) + (compare (match_operand:SI 0 "memory_operand" "") + (const_int 0))) + (match_scratch:SI 3 "r")] + "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size" + [(set (match_dup 3) (match_dup 0)) + (set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))] + "") + +;; NOT is not pairable on Pentium, while XOR is, but one byte longer. +;; Don't split NOTs with a displacement operand, because resulting XOR +;; will not be pariable anyway. +;; +;; On AMD K6, NOT is vector decoded with memory operand that can not be +;; represented using a modRM byte. The XOR replacement is long decoded, +;; so this split helps here as well. +;; +;; Note: Can't do this as a regular split because we can't get proper +;; lifetime information then. + +(define_peephole2 + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "!optimize_size + && peep2_regno_dead_p (0, FLAGS_REG) + && ((TARGET_PENTIUM + && (GET_CODE (operands[0]) != MEM + || !memory_displacement_operand (operands[0], SImode))) + || (TARGET_K6 && long_memory_operand (operands[0], SImode)))" + [(parallel [(set (match_dup 0) + (xor:SI (match_dup 1) (const_int -1))) + (clobber (reg:CC 17))])] + "") + +(define_peephole2 + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "!optimize_size + && peep2_regno_dead_p (0, FLAGS_REG) + && ((TARGET_PENTIUM + && (GET_CODE (operands[0]) != MEM + || !memory_displacement_operand (operands[0], HImode))) + || (TARGET_K6 && long_memory_operand (operands[0], HImode)))" + [(parallel [(set (match_dup 0) + (xor:HI (match_dup 1) (const_int -1))) + (clobber (reg:CC 17))])] + "") + +(define_peephole2 + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "!optimize_size + && peep2_regno_dead_p (0, FLAGS_REG) + && ((TARGET_PENTIUM + && (GET_CODE (operands[0]) != MEM + || !memory_displacement_operand (operands[0], QImode))) + || (TARGET_K6 && long_memory_operand (operands[0], QImode)))" + [(parallel [(set (match_dup 0) + (xor:QI (match_dup 1) (const_int -1))) + (clobber (reg:CC 17))])] + "") + +;; Non pairable "test imm, reg" instructions can be translated to +;; "and imm, reg" if reg dies. The "and" form is also shorter (one +;; byte opcode instead of two, have a short form for byte operands), +;; so do it for other CPUs as well. Given that the value was dead, +;; this should not create any new dependencies. Pass on the sub-word +;; versions if we're concerned about partial register stalls. + +(define_peephole2 + [(set (reg 17) + (compare (and:SI (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" "")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && (true_regnum (operands[0]) != 0 + || CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K')) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel + [(set (reg:CCNO 17) + (compare:CCNO (and:SI (match_dup 0) + (match_dup 1)) + (const_int 0))) + (set (match_dup 0) + (and:SI (match_dup 0) (match_dup 1)))])] + "") + +;; We don't need to handle HImode case, because it will be promoted to SImode +;; on ! TARGET_PARTIAL_REG_STALL + +(define_peephole2 + [(set (reg 17) + (compare (and:QI (match_operand:QI 0 "register_operand" "") + (match_operand:QI 1 "immediate_operand" "")) + (const_int 0)))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[0]) != 0 + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel + [(set (reg:CCNO 17) + (compare:CCNO (and:QI (match_dup 0) + (match_dup 1)) + (const_int 0))) + (set (match_dup 0) + (and:QI (match_dup 0) (match_dup 1)))])] + "") + +(define_peephole2 + [(set (reg 17) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[0]) != 0 + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel [(set (reg:CCNO 17) + (compare:CCNO + (and:SI + (zero_extract:SI + (match_dup 0) + (const_int 8) + (const_int 8)) + (match_dup 1)) + (const_int 0))) + (set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 0) + (const_int 8) + (const_int 8)) + (match_dup 1)))])] + "") + +;; Don't do logical operations with memory inputs. +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "memory_operand" "")])) + (clobber (reg:CC 17))])] + "! optimize_size && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 2)])) + (clobber (reg:CC 17))])] + "") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "memory_operand" "") + (match_dup 0)])) + (clobber (reg:CC 17))])] + "! optimize_size && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2) (match_dup 0)])) + (clobber (reg:CC 17))])] + "") + +; Don't do logical operations with memory outputs +; +; These two don't make sense for PPro/PII -- we're expanding a 4-uop +; instruction into two 1-uop insns plus a 2-uop insn. That last has +; the same decoder scheduling characteristics as the original. + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")])) + (clobber (reg:CC 17))])] + "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 2) (match_dup 1)])) + (clobber (reg:CC 17))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "nonmemory_operand" "") + (match_dup 0)])) + (clobber (reg:CC 17))])] + "! optimize_size && ! TARGET_READ_MODIFY_WRITE" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC 17))]) + (set (match_dup 0) (match_dup 2))] + "") + +;; Attempt to always use XOR for zeroing registers. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (const_int 0))] + "(GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == SImode + || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) + && (! TARGET_USE_MOV0 || optimize_size) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (reg:CC 17))])] + "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode, + true_regnum (operands[0]));") + +(define_peephole2 + [(set (strict_low_part (match_operand 0 "register_operand" "")) + (const_int 0))] + "(GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode) + && (! TARGET_USE_MOV0 || optimize_size) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) + (clobber (reg:CC 17))])]) + +;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (const_int -1))] + "(GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == SImode + || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) + && (optimize_size || TARGET_PENTIUM) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int -1)) + (clobber (reg:CC 17))])] + "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode, + true_regnum (operands[0]));") + +;; Attempt to convert simple leas to adds. These can be created by +;; move expanders. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")) 0))] + "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_dup 0) + (match_operand:DI 1 "x86_64_general_operand" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1))) + (clobber (reg:CC 17))])] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_dup 0) + (match_operand:DI 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" "")) 0))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC 17))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") + +;; The ESP adjustments can be done by the push and pop instructions. Resulting +;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On +;; many CPUs it is also faster, since special hardware to avoid esp +;; dependencies is present. + +;; While some of these conversions may be done using splitters, we use peepholes +;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL. + +;; Convert prologue esp subtractions to push. +;; We need register to push. In order to keep verify_flow_info happy we have +;; two choices +;; - use scratch and clobber it in order to avoid dependencies +;; - use already live register +;; We can't use the second way right now, since there is no reliable way how to +;; verify that given register is live. First choice will also most likely in +;; fewer dependencies. On the place of esp adjustments it is very likely that +;; call clobbered registers are dead. We may want to use base pointer as an +;; alternative when no register is available later. + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4))) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (clobber (mem:BLK (scratch)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4))) + (clobber (reg:CC 17))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (clobber (reg:CC 17))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8))) + (clobber (reg:CC 17))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI 7))) + (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])] + "") + +;; Convert compares with 1 to shorter inc/dec operations when CF is not +;; required and register dies. +(define_peephole2 + [(set (reg 17) + (compare (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "incdec_operand" "")))] + "ix86_match_ccmode (insn, CCGCmode) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel [(set (reg:CCGC 17) + (compare:CCGC (match_dup 0) + (match_dup 1))) + (clobber (match_dup 0))])] + "") + +(define_peephole2 + [(set (reg 17) + (compare (match_operand:HI 0 "register_operand" "") + (match_operand:HI 1 "incdec_operand" "")))] + "ix86_match_ccmode (insn, CCGCmode) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel [(set (reg:CCGC 17) + (compare:CCGC (match_dup 0) + (match_dup 1))) + (clobber (match_dup 0))])] + "") + +(define_peephole2 + [(set (reg 17) + (compare (match_operand:QI 0 "register_operand" "") + (match_operand:QI 1 "incdec_operand" "")))] + "ix86_match_ccmode (insn, CCGCmode) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel [(set (reg:CCGC 17) + (compare:CCGC (match_dup 0) + (match_dup 1))) + (clobber (match_dup 0))])] + "") + +;; Convert compares with 128 to shorter add -128 +(define_peephole2 + [(set (reg 17) + (compare (match_operand:SI 0 "register_operand" "") + (const_int 128)))] + "ix86_match_ccmode (insn, CCGCmode) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel [(set (reg:CCGC 17) + (compare:CCGC (match_dup 0) + (const_int 128))) + (clobber (match_dup 0))])] + "") + +(define_peephole2 + [(set (reg 17) + (compare (match_operand:HI 0 "register_operand" "") + (const_int 128)))] + "ix86_match_ccmode (insn, CCGCmode) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(parallel [(set (reg:CCGC 17) + (compare:CCGC (match_dup 0) + (const_int 128))) + (clobber (match_dup 0))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -8))) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) + (clobber (reg:CC 17))])] + "optimize_size || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI 7))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + (clobber (mem:BLK (scratch)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:DI 0 "r") + (match_scratch:DI 1 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8))) + (clobber (reg:CC 17))])] + "" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:DI 0 "r") + (match_scratch:DI 1 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) + (clobber (reg:CC 17))])] + "" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 16))) + (clobber (reg:CC 17))])] + "optimize_size" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI 7))) + (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] + "") + +;; Call-value patterns last so that the wildcard operand does not +;; disrupt insn-recog's switch tables. + +(define_insn "*call_value_pop_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI 7) (plus:SI (reg:SI 7) + (match_operand:SI 3 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI 7) (plus:SI (reg:SI 7) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], QImode)) + { + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; + } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A1"; + else + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" "")))] + "TARGET_64BIT" { - load_pic_register (1); + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], QImode)) + { + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; + } + if (SIBLING_CALL_P (insn)) + return "jmp\t%*%1"; + else + return "call\t%*%1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], QImode)) + { + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; + } + if (SIBLING_CALL_P (insn)) + return "jmp\t%A1"; + else + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "trap" + [(trap_if (const_int 1) (const_int 5))] + "" + "int\t$5") + +;;; ix86 doesn't have conditional trap instructions, but we fake them +;;; for the sake of bounds checking. By emitting bounds checks as +;;; conditional traps rather than as conditional jumps around +;;; unconditional traps we avoid introducing spurious basic-block +;;; boundaries and facilitate elimination of redundant checks. In +;;; honor of the too-inflexible-for-BPs `bound' instruction, we use +;;; interrupt 5. +;;; +;;; FIXME: Static branch prediction rules for ix86 are such that +;;; forward conditional branches predict as untaken. As implemented +;;; below, pseudo conditional traps violate that rule. We should use +;;; .pushsection/.popsection to place all of the `int 5's in a special +;;; section loaded at the end of the text segment and branch forward +;;; there on bounds-failure, and then jump back immediately (in case +;;; the system chooses to ignore bounds violations, or to report +;;; violations and continue execution). + +(define_expand "conditional_trap" + [(trap_if (match_operator 0 "comparison_operator" + [(match_dup 2) (const_int 0)]) + (match_operand 1 "const_int_operand" ""))] + "" +{ + emit_insn (gen_rtx_TRAP_IF (VOIDmode, + ix86_expand_compare (GET_CODE (operands[0]), + NULL, NULL), + operands[1])); DONE; -}") +}) + +(define_insn "*conditional_trap_1" + [(trap_if (match_operator 0 "comparison_operator" + [(reg 17) (const_int 0)]) + (match_operand 1 "const_int_operand" ""))] + "" +{ + operands[2] = gen_label_rtx (); + output_asm_insn ("j%c0\t%l2\; int\t%1", operands); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[2])); + RET; +}) + + ;; Pentium III SIMD instructions. + +;; Moves for SSE/MMX regs. + +(define_insn "movv4sf_internal" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv4si_internal" + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") + (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv8qi_internal" + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") + (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] + "TARGET_MMX" + "movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv4hi_internal" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") + (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] + "TARGET_MMX" + "movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv2si_internal" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") + (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] + "TARGET_MMX" + "movq\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv2sf_internal" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") + (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] + "TARGET_3DNOW" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "general_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_SSE || TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "general_operand" "") + (match_operand:V4SF 1 "general_operand" ""))] + "TARGET_SSE" +{ + ix86_expand_vector_move (V4SFmode, operands); + DONE; +}) + +(define_expand "movv4si" + [(set (match_operand:V4SI 0 "general_operand" "") + (match_operand:V4SI 1 "general_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (V4SImode, operands); + DONE; +}) + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_operand" "") + (match_operand:V2SI 1 "general_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (V2SImode, operands); + DONE; +}) + +(define_expand "movv4hi" + [(set (match_operand:V4HI 0 "general_operand" "") + (match_operand:V4HI 1 "general_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (V4HImode, operands); + DONE; +}) + +(define_expand "movv8qi" + [(set (match_operand:V8QI 0 "general_operand" "") + (match_operand:V8QI 1 "general_operand" ""))] + "TARGET_MMX" +{ + ix86_expand_vector_move (V8QImode, operands); + DONE; +}) + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "general_operand" "") + (match_operand:V2SF 1 "general_operand" ""))] + "TARGET_3DNOW" +{ + ix86_expand_vector_move (V2SFmode, operands); + DONE; +}) + +(define_insn_and_split "*pushti" + [(set (match_operand:TI 0 "push_operand" "=<") + (match_operand:TI 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:TI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V4SF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V4SI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V2SI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V4HI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V8QI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "nonmemory_operand" "y"))] + "TARGET_3DNOW" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V2SF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn "movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "general_operand" "O,xm,x"))] + "TARGET_SSE && !TARGET_64BIT" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "*movti_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") + (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + # + # + xorps\t%0, %0 + movaps\\t{%1, %0|%0, %1} + movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,sse,sse,sse") + (set_attr "mode" "TI")]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; These two patterns are useful for specifying exactly whether to use +;; movaps or movups +(define_insn "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))] + "TARGET_SSE" + "@ + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))] + "TARGET_SSE" + "@ + movups\t{%1, %0|%0, %1} + movups\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; SSE Strange Moves. + +(define_insn "sse_movmskps" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))] + "TARGET_SSE" + "movmskps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmovmskb\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_maskmovq" + [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] 32))] + "TARGET_SSE || TARGET_3DNOW_A" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movntv4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))] + "TARGET_SSE" + "movntps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movntdi" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "y")] 34))] + "TARGET_SSE || TARGET_3DNOW_A" + "movntq\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movhlps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (const_int 3)))] + "TARGET_SSE" + "movhlps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movlhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (const_int 12)))] + "TARGET_SSE" + "movlhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0") + (match_operand:V4SF 2 "nonimmediate_operand" "m,x") + (const_int 12)))] + "TARGET_SSE + && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0") + (match_operand:V4SF 2 "nonimmediate_operand" "m,x") + (const_int 3)))] + "TARGET_SSE + && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movlps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_loadss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "memory_operand" "m") + (vec_duplicate:V4SF (float:SF (const_int 0))) + (const_int 1)))] + "TARGET_SSE" + "movss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x") + (const_int 1)))] + "TARGET_SSE" + "movss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_storess" + [(set (match_operand:SF 0 "memory_operand" "=m") + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE" + "movss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_shufps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "immediate_operand" "i")] 41))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax + "shufps\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse")]) + + +;; SSE arithmetic + +(define_insn "addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "addps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmaddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "addss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "subps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "subss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "mulps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "mulss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "divps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmdivv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "divss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE square root/reciprocal + +(define_insn "rcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))] + "TARGET_SSE" + "rcpps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmrcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rcpss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "rsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))] + "TARGET_SSE" + "rsqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmrsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rsqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "sqrtps\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "sqrtss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; SSE logical operations. + +;; These are not called andti3 etc. because we really really don't want +;; the compiler to widen DImode ands to TImode ands and then try to move +;; into DImode subregs of SSE registers, and them together, and move out +;; of DImode subregs again! + +(define_insn "*sse_andti3_df_1" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] + "TARGET_SSE2" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_andti3_df_2" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (and:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (match_operand:TI 2 "nonimmediate_operand" "Ym")))] + "TARGET_SSE2" + "andpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_andti3_sf_1" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] + "TARGET_SSE" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_andti3_sf_2" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (and:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_andti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (match_operand:TI 1 "register_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && !TARGET_SSE2" + "andps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_andti3_sse2" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (match_operand:TI 1 "register_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_nandti3_df" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0)) + (match_operand:TI 2 "nonimmediate_operand" "Ym")))] + "TARGET_SSE2" + "andnpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_nandti3_sf" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (and:TI (not:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0)) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && !TARGET_SSE2" + "andnps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_nandti3_sse2" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pnand\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_iorti3_df_1" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] + "TARGET_SSE2" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_iorti3_df_2" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (match_operand:TI 2 "nonimmediate_operand" "Ym")))] + "TARGET_SSE2" + "orpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_iorti3_sf_1" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] + "TARGET_SSE" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_iorti3_sf_2" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (ior:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_iorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ior:TI (match_operand:TI 1 "register_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && !TARGET_SSE2" + "orps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_iorti3_sse2" + [(set (match_operand:TI 0 "register_operand" "=x") + (ior:TI (match_operand:TI 1 "register_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_xorti3_df_1" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:DF 2 "register_operand" "Y") 0)))] + "TARGET_SSE2" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_xorti3_df_2" + [(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0) + (xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0) + (match_operand:TI 2 "nonimmediate_operand" "Ym")))] + "TARGET_SSE2" + "xorpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_xorti3_sf_1" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "%0") 0) + (subreg:TI (match_operand:SF 2 "register_operand" "x") 0)))] + "TARGET_SSE" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_xorti3_sf_2" + [(set (subreg:TI (match_operand:SF 0 "register_operand" "=x") 0) + (xor:TI (subreg:TI (match_operand:SF 1 "register_operand" "0") 0) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_xorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (xor:TI (match_operand:TI 1 "register_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE && !TARGET_SSE2" + "xorps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "*sse_xorti3_sse2" + [(set (match_operand:TI 0 "register_operand" "=x") + (xor:TI (match_operand:TI 1 "register_operand" "%0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE2" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; Use xor, but don't show input operands so they aren't live before +;; this insn. +(define_insn "sse_clrv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(const_int 0)] 45))] + "TARGET_SSE" + "xorps\t{%0, %0|%0, %0}" + [(set_attr "type" "sse") + (set_attr "memory" "none")]) + +;; SSE mask-generating compares + +(define_insn "maskcmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]))] + "TARGET_SSE" + "cmp%D3ps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "maskncmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (not:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")])))] + "TARGET_SSE" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordps\t{%2, %0|%0, %2}"; + else + return "cmpn%D3ps\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sse")]) + +(define_insn "vmmaskcmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "cmp%D3ss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskncmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (not:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")])) + (subreg:V4SI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordss\t{%2, %0|%0, %2}"; + else + return "cmpn%D3ss\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sse")]) + +(define_insn "sse_comi" + [(set (reg:CCFP 17) + (match_operator:CCFP 2 "sse_comparison_operator" + [(vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE" + "comiss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_ucomi" + [(set (reg:CCFPU 17) + (match_operator:CCFPU 2 "sse_comparison_operator" + [(vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE" + "ucomiss\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; SSE unpack + +(define_insn "sse_unpckhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_SSE" + "unpckhps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_unpcklps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_SSE" + "unpcklps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE min/max + +(define_insn "smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "maxps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsmaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "maxss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "minps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "minss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE <-> integer/MMX conversions + +(define_insn "cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) + (const_int 12)))] + "TARGET_SSE" + "cvtpi2ps\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvtps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SSE" + "cvttps2pi\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtsi2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 14)))] + "TARGET_SSE" + "cvtsi2ss\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtss2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (vec_select:SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttss2si" + [(set (match_operand:SI 0 "register_operand" "=r") + (vec_select:SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvttss2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; MMX insns + +;; MMX arithmetic + +(define_insn "addv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ssaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "usaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "sssubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ussubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pmullw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX" + "pmulhw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmulhuw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_pmaddwd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))) + (mult:V2SI + (sign_extend:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3)]))))))] + "TARGET_MMX" + "pmaddwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX logical operations +;; Note we don't want to declare these as regular iordi3 insns to prevent +;; normal code that also wants to use the FPU from getting broken. +;; The UNSPECs are there to prevent the combiner from getting overly clever. +(define_insn "mmx_iordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(ior:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "por\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_xordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(xor:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pxor\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "mmx_clrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(const_int 0)] 45))] + "TARGET_MMX" + "pxor\t{%0, %0|%0, %0}" + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +(define_insn "mmx_anddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pand\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_nanddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pandn\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX unsigned averages/sum of absolute differences + +(define_insn "mmx_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ashiftrt:V8QI + (plus:V8QI (plus:V8QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (vec_const:V8QI (parallel [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)]))) + (const_int 1)))] + "TARGET_SSE || TARGET_3DNOW_A" + "pavgb\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI + (plus:V4HI (plus:V4HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (vec_const:V4HI (parallel [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)]))) + (const_int 1)))] + "TARGET_SSE || TARGET_3DNOW_A" + "pavgw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_psadbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym"))))] + "TARGET_SSE || TARGET_3DNOW_A" + "psadbw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX insert/extract/shuffle + +(define_insn "mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0") + (vec_duplicate:V4HI + (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pinsrw\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SSE || TARGET_3DNOW_A" + "pextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pshufw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")] 41))] + "TARGET_SSE || TARGET_3DNOW_A" + "pshufw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sse")]) + + +;; MMX mask-generating comparisons + +(define_insn "eqv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX max/min insns + +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmaxub\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pmaxsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pminub\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE || TARGET_3DNOW_A" + "pminsw\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX shifts + +(define_insn "ashrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psraw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrad\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "lshrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrlw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "lshrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrld\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; See logical MMX insns. +(define_insn "mmx_lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] + "TARGET_MMX" + "psrlq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashlv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psllw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashlv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "pslld\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; See logical MMX insns. +(define_insn "mmx_ashldi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(ashift:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi"))] 45))] + "TARGET_MMX" + "psllq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX pack/unpack insns. + +(define_insn "mmx_packsswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packsswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_packssdw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) + (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packssdw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_packuswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packuswb\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (const_int 85)))] + "TARGET_MMX" + "punpckhbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_MMX" + "punpckhwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhdq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 1)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_MMX" + "punpckhdq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpcklbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (const_int 85)))] + "TARGET_MMX" + "punpcklbw\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpcklwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_MMX" + "punpcklwd\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckldq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 1) + (const_int 0)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (const_int 1)))] + "TARGET_MMX" + "punpckldq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; Miscellaneous stuff + +(define_insn "emms" + [(unspec_volatile [(const_int 0)] 31) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_MMX" + "emms" + [(set_attr "type" "mmx") + (set_attr "memory" "unknown")]) + +(define_insn "ldmxcsr" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)] + "TARGET_MMX" + "ldmxcsr\t%0" + [(set_attr "type" "mmx") + (set_attr "memory" "load")]) + +(define_insn "stmxcsr" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec_volatile:SI [(const_int 0)] 40))] + "TARGET_MMX" + "stmxcsr\t%0" + [(set_attr "type" "mmx") + (set_attr "memory" "store")]) + +(define_expand "sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] 44))] + "TARGET_SSE || TARGET_3DNOW_A" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*sfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] 44))] + "TARGET_SSE || TARGET_3DNOW_A" + "sfence" + [(set_attr "type" "sse") + (set_attr "memory" "unknown")]) + +(define_expand "sse_prologue_save" + [(parallel [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] 13)) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (label_ref:DI (match_operand 3 "" "")))])] + "TARGET_64BIT" + "") + +(define_insn "*sse_prologue_save_insn" + [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") + (match_operand:DI 4 "const_int_operand" "n"))) + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] 13)) + (use (match_operand:DI 1 "register_operand" "r")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (label_ref:DI (match_operand 3 "" "X")))] + "TARGET_64BIT + && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" + "* +{ + int i; + operands[0] = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, operands[0], operands[4])); + output_asm_insn (\"jmp\\t%A1\", operands); + for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) + { + operands[4] = adjust_address (operands[0], DImode, i*16); + operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); + PUT_MODE (operands[4], TImode); + if (GET_CODE (XEXP (operands[0], 0)) != PLUS) + output_asm_insn (\"rex\", operands); + output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); + } + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", + CODE_LABEL_NUMBER (operands[3])); + RET; +} + " + [(set_attr "type" "other") + (set_attr "length_immediate" "0") + (set_attr "length_address" "0") + (set_attr "length" "135") + (set_attr "memory" "store") + (set_attr "modrm" "0") + (set_attr "mode" "DI")]) + +;; 3Dnow! instructions + +(define_insn "addv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (plus:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfadd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (minus:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfsub\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subrv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym") + (match_operand:V2SF 1 "register_operand" "0")))] + "TARGET_3DNOW" + "pfsubr\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpgt\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gev2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ge:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpge\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv2sf3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfcmpeq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfmaxv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smax:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmax\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfminv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (smin:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmin\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mulv2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (mult:V2SF (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pfmul\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "femms" + [(unspec_volatile [(const_int 0)] 46) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34)) + (clobber (reg:DI 35)) + (clobber (reg:DI 36))] + "TARGET_3DNOW" + "femms" + [(set_attr "type" "mmx")]) + +(define_insn "pf2id" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pf2id\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pf2iw" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (sign_extend:V2SI + (ss_truncate:V2HI + (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))] + "TARGET_3DNOW_A" + "pf2iw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pfacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (plus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW" + "pfacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfnacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (minus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfpnacc" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_concat:V2SF + (minus:SF + (vec_select:SF (match_operand:V2SF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 1) + (parallel [(const_int 1)]))) + (plus:SF + (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y") + (parallel [(const_int 0)])) + (vec_select:SF (match_dup 2) + (parallel [(const_int 1)])))))] + "TARGET_3DNOW_A" + "pfpnacc\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pi2fw" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF + (vec_concat:V2SI + (sign_extend:SI + (truncate:HI + (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 0)])))) + (sign_extend:SI + (truncate:HI + (vec_select:SI (match_dup 1) + (parallel [(const_int 1)])))))))] + "TARGET_3DNOW_A" + "pi2fw\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "floatv2si2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))] + "TARGET_3DNOW" + "pi2fd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +;; This insn is identical to pavgb in operation, but the opcode is +;; different. To avoid accidentally matching pavgb, use an unspec. + +(define_insn "pavgusb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (unspec:V8QI + [(match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 49))] + "TARGET_3DNOW" + "pavgusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; 3DNow reciprical and sqrt + +(define_insn "pfrcpv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 50))] + "TARGET_3DNOW" + "pfrcp\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrcpit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 51))] + "TARGET_3DNOW" + "pfrcpit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrcpit2v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 52))] + "TARGET_3DNOW" + "pfrcpit2\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrsqrtv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")] 53))] + "TARGET_3DNOW" + "pfrsqrt\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pfrsqit1v2sf3" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] 54))] + "TARGET_3DNOW" + "pfrsqit1\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pmulhrwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (vec_const:V4SI + (parallel [(const_int 32768) + (const_int 32768) + (const_int 32768) + (const_int 32768)]))) + (const_int 16))))] + "TARGET_3DNOW" + "pmulhrw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "pswapdv2si2" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "pswapdv2sf2" + [(set (match_operand:V2SF 0 "register_operand" "=y") + (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A" + "pswapd\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_expand "prefetch" + [(prefetch (match_operand:SI 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + + if (rw != 0 && rw != 1) + abort (); + if (locality < 0 || locality > 3) + abort (); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + suported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; +}) + +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + if (locality < 0 || locality > 3) + abort (); + + return patterns[locality]; +} + [(set_attr "type" "sse")]) + +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx")]) |