diff options
Diffstat (limited to 'contrib/gcc/config/i386/i386.md')
-rw-r--r-- | contrib/gcc/config/i386/i386.md | 5228 |
1 files changed, 2999 insertions, 2229 deletions
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index e4377a5..a190d23 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -1,24 +1,24 @@ ;; GCC machine description for IA-32 and x86-64. ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, -;; 2001, 2002, 2003 +;; 2001, 2002, 2003, 2004 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka ;; -;; This file is part of GNU CC. +;; This file is part of GCC. ;; -;; GNU CC is free software; you can redistribute it and/or modify +;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; -;; GNU CC is distributed in the hope that it will be useful, +;; GCC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License -;; along with GNU CC; see the file COPYING. If not, write to +;; along with GCC; see the file COPYING. If not, write to ;; the Free Software Foundation, 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. */ ;; @@ -27,9 +27,6 @@ ;; ;; See file "rtl.def" for documentation on define_insn, match_*, et. al. ;; -;; Macro #define NOTICE_UPDATE_CC in file i386.h handles condition code -;; updates for most instructions. -;; ;; Macro REG_CLASS_FROM_LETTER in file i386.h defines the register ;; constraint letters. ;; @@ -66,7 +63,6 @@ (UNSPEC_INDNTPOFF 8) ; Prologue support - (UNSPEC_STACK_PROBE 10) (UNSPEC_STACK_ALLOC 11) (UNSPEC_SET_GOT 12) (UNSPEC_SSE_PROLOGUE_SAVE 13) @@ -80,7 +76,6 @@ (UNSPEC_SCAS 20) (UNSPEC_SIN 21) (UNSPEC_COS 22) - (UNSPEC_BSF 23) (UNSPEC_FNSTSW 24) (UNSPEC_SAHF 25) (UNSPEC_FSTCW 26) @@ -117,16 +112,28 @@ (UNSPEC_MOVSLDUP 75) (UNSPEC_LDQQU 76) (UNSPEC_MOVDDUP 77) + + ; x87 Floating point + (UNSPEC_FPATAN 65) + (UNSPEC_FYL2X 66) + (UNSPEC_FSCALE 67) + (UNSPEC_FRNDINT 68) + (UNSPEC_F2XM1 69) + + ; REP instruction + (UNSPEC_REP 75) ]) (define_constants [(UNSPECV_BLOCKAGE 0) + (UNSPECV_STACK_PROBE 10) (UNSPECV_EH_RETURN 13) (UNSPECV_EMMS 31) (UNSPECV_LDMXCSR 37) (UNSPECV_STMXCSR 40) (UNSPECV_FEMMS 46) (UNSPECV_CLFLUSH 57) + (UNSPECV_ALIGN 68) (UNSPECV_MONITOR 69) (UNSPECV_MWAIT 70) ]) @@ -142,8 +149,8 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4" - (const (symbol_ref "ix86_cpu"))) +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8" + (const (symbol_ref "ix86_tune"))) ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. @@ -152,17 +159,17 @@ alu,alu1,negnot,imov,imovx,lea, incdec,ishift,ishift1,rotate,rotate1,imul,idiv, icmp,test,ibr,setcc,icmov, - push,pop,call,callv, + push,pop,call,callv,leave, str,cld, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp, sselog,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) ;; Main data type used by the insn (define_attr "mode" - "unknown,none,QI,HI,SI,DI,unknownfp,SF,DF,XF,TI,V4SF,V2DF,V2SF" + "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF" (const_string "unknown")) ;; The CPU unit operations uses. @@ -170,7 +177,7 @@ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp") (const_string "i387") (eq_attr "type" "sselog,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv") + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") @@ -180,7 +187,7 @@ ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" - (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv") + (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave") (const_int 0) (eq_attr "unit" "i387,sse,mmx") (const_int 0) @@ -234,17 +241,29 @@ ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" (if_then_else - (eq_attr "type" - "imovx,setcc,icmov, - sselog,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecvt,ssediv, - mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (ior (eq_attr "type" "imovx,setcc,icmov") + (eq_attr "unit" "sse,mmx")) (const_int 1) (const_int 0))) +;; Set when 0f opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(and (eq_attr "mode" "DI") + (eq_attr "type" "!push,pop,call,callv,leave,ibr")) + (const_int 1) + (and (eq_attr "mode" "QI") + (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)") + (const_int 0))) + (const_int 1) + (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)") + (const_int 0)) + (const_int 1) + ] + (const_int 0))) + ;; Set when modrm byte is used. (define_attr "modrm" "" - (cond [(eq_attr "type" "str,cld") + (cond [(eq_attr "type" "str,cld,leave") (const_int 0) (eq_attr "unit" "i387") (const_int 0) @@ -285,7 +304,8 @@ (attr "length_address")))] (plus (plus (attr "modrm") (plus (attr "prefix_0f") - (const_int 1))) + (plus (attr "prefix_rex") + (const_int 1)))) (plus (attr "prefix_rep") (plus (attr "prefix_data16") (plus (attr "length_immediate") @@ -300,17 +320,21 @@ (const_string "unknown") (eq_attr "type" "lea,fcmov,fpspc,cld") (const_string "none") - (eq_attr "type" "fistp") + (eq_attr "type" "fistp,leave") (const_string "both") (eq_attr "type" "push") (if_then_else (match_operand 1 "memory_operand" "") (const_string "both") (const_string "store")) - (eq_attr "type" "pop,setcc") + (eq_attr "type" "pop") (if_then_else (match_operand 0 "memory_operand" "") (const_string "both") (const_string "load")) - (eq_attr "type" "icmp,test,ssecmp,mmxcmp,fcmp") + (eq_attr "type" "setcc") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "store") + (const_string "none")) + (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") (if_then_else (ior (match_operand 0 "memory_operand" "") (match_operand 1 "memory_operand" "")) (const_string "load") @@ -327,7 +351,7 @@ (if_then_else (match_operand 1 "constant_call_address_operand" "") (const_string "none") (const_string "load")) - (and (eq_attr "type" "alu1,negnot") + (and (eq_attr "type" "alu1,negnot,ishift1") (match_operand 1 "memory_operand" "")) (const_string "both") (and (match_operand 0 "memory_operand" "") @@ -338,10 +362,10 @@ (match_operand 1 "memory_operand" "") (const_string "load") (and (eq_attr "type" - "!alu1,negnot, + "!alu1,negnot,ishift1, imov,imovx,icmp,test, fmov,fcmp,fsgn, - sse,ssemov,ssecmp,ssecvt, + sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt, mmx,mmxmov,mmxcmp,mmxcvt") (match_operand 2 "memory_operand" "")) (const_string "load") @@ -692,17 +716,6 @@ [(set (reg:CC 17) (compare:CC (match_operand:XF 0 "cmp_fp_expander_operand" "") (match_operand:XF 1 "cmp_fp_expander_operand" "")))] - "!TARGET_64BIT && TARGET_80387" -{ - ix86_compare_op0 = operands[0]; - ix86_compare_op1 = operands[1]; - DONE; -}) - -(define_expand "cmptf" - [(set (reg:CC 17) - (compare:CC (match_operand:TF 0 "cmp_fp_expander_operand" "") - (match_operand:TF 1 "cmp_fp_expander_operand" "")))] "TARGET_80387" { ix86_compare_op0 = operands[0]; @@ -766,7 +779,13 @@ return "ftst\;fnstsw\t%0"; } [(set_attr "type" "multi") - (set_attr "mode" "unknownfp")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) ;; We may not use "#" to split and emit these, since the REG_DEAD notes ;; used to manage the reg stack popping would not be preserved. @@ -820,16 +839,6 @@ (compare:CCFP (match_operand:XF 0 "register_operand" "f") (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" - "* return output_fp_compare (insn, operands, 0, 0);" - [(set_attr "type" "fcmp") - (set_attr "mode" "XF")]) - -(define_insn "*cmpfp_2_tf" - [(set (reg:CCFP 18) - (compare:CCFP - (match_operand:TF 0 "register_operand" "f") - (match_operand:TF 1 "register_operand" "f")))] "TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "fcmp") @@ -842,18 +851,6 @@ (match_operand:XF 1 "register_operand" "f") (match_operand:XF 2 "register_operand" "f"))] UNSPEC_FNSTSW))] - "!TARGET_64BIT && TARGET_80387" - "* return output_fp_compare (insn, operands, 2, 0);" - [(set_attr "type" "multi") - (set_attr "mode" "XF")]) - -(define_insn "*cmpfp_2_tf_1" - [(set (match_operand:HI 0 "register_operand" "=a") - (unspec:HI - [(compare:CCFP - (match_operand:TF 1 "register_operand" "f") - (match_operand:TF 2 "register_operand" "f"))] - UNSPEC_FNSTSW))] "TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") @@ -869,7 +866,13 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 0, 1);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) (define_insn "*cmpfp_2u_1" [(set (match_operand:HI 0 "register_operand" "=a") @@ -883,7 +886,13 @@ && GET_MODE (operands[1]) == GET_MODE (operands[2])" "* return output_fp_compare (insn, operands, 2, 1);" [(set_attr "type" "multi") - (set_attr "mode" "unknownfp")]) + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) ;; Patterns to match the SImode-in-memory ficom instructions. ;; @@ -923,7 +932,7 @@ ;; FP compares, step 2 ;; Move the fpsw to ax. -(define_insn "x86_fnstsw_1" +(define_insn "*x86_fnstsw_1" [(set (match_operand:HI 0 "register_operand" "=a") (unspec:HI [(reg 18)] UNSPEC_FNSTSW))] "TARGET_80387" @@ -958,7 +967,13 @@ && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_i_sse" @@ -969,8 +984,11 @@ && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "fcmp,ssecmp") - (set_attr "mode" "unknownfp") + [(set_attr "type" "fcmp,ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_i_sse_only" @@ -980,8 +998,11 @@ "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[0])" "* return output_fp_compare (insn, operands, 1, 0);" - [(set_attr "type" "ssecmp") - (set_attr "mode" "unknownfp") + [(set_attr "type" "ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu" @@ -994,7 +1015,13 @@ && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" [(set_attr "type" "fcmp") - (set_attr "mode" "unknownfp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu_sse" @@ -1005,8 +1032,11 @@ && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "fcmp,ssecmp") - (set_attr "mode" "unknownfp") + [(set_attr "type" "fcmp,ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) (define_insn "*cmpfp_iu_sse_only" @@ -1016,8 +1046,11 @@ "SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && GET_MODE (operands[0]) == GET_MODE (operands[1])" "* return output_fp_compare (insn, operands, 1, 1);" - [(set_attr "type" "ssecmp") - (set_attr "mode" "unknownfp") + [(set_attr "type" "ssecomi") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) (set_attr "athlon_decode" "vector")]) ;; Move instructions. @@ -1095,13 +1128,13 @@ [(set_attr "type" "alu1") (set_attr "mode" "SI") (set_attr "length_immediate" "0")]) - + (define_insn "*movsi_or" [(set (match_operand:SI 0 "register_operand" "=r") (match_operand:SI 1 "immediate_operand" "i")) (clobber (reg:CC 17))] - "reload_completed && GET_CODE (operands[1]) == CONST_INT - && INTVAL (operands[1]) == -1 + "reload_completed + && operands[1] == constm1_rtx && (TARGET_PENTIUM || optimize_size)" { operands[1] = constm1_rtx; @@ -1112,9 +1145,49 @@ (set_attr "length_immediate" "1")]) (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm") - (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))] - "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!rm,!*Y") + (match_operand:SI 1 "general_operand" "rinm,rin,*y,*y,rm,*Y,*Y,rm"))] + "(TARGET_INTER_UNIT_MOVES || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSEMOV: + if (get_attr_mode (insn) == MODE_TI) + return "movdqa\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) + return "movq\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_LEA: + return "lea{l}\t{%1, %0|%0, %1}"; + + default: + if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) + abort(); + return "mov{l}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2,3,4") + (const_string "mmxmov") + (eq_attr "alternative" "5,6,7") + (const_string "ssemov") + (and (ne (symbol_ref "flag_pic") (const_int 0)) + (match_operand:SI 1 "symbolic_operand" "")) + (const_string "lea") + ] + (const_string "imov"))) + (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")]) + +(define_insn "*movsi_1_nointernunit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!m,!*y,!*Y,!m,!*Y") + (match_operand:SI 1 "general_operand" "rinm,rin,*y,*y,m,*Y,*Y,m"))] + "(!TARGET_INTER_UNIT_MOVES && !optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (get_attr_type (insn)) { @@ -1147,9 +1220,9 @@ (const_string "lea") ] (const_string "imov"))) - (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) + (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")]) -;; Stores and loads of ax to arbitary constant address. +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabssi_1_rex64" @@ -1268,7 +1341,7 @@ ] (const_string "HI")))]) -;; Stores and loads of ax to arbitary constant address. +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabshi_1_rex64" @@ -1586,7 +1659,7 @@ (const_string "SI") (const_string "QI")))]) -;; Stores and loads of ax to arbitary constant address. +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsqi_1_rex64" @@ -1690,11 +1763,11 @@ [(set_attr "type" "imov") (set_attr "mode" "QI")]) -(define_insn "*movsi_insv_1_rex64" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") +(define_insn "movdi_insv_1_rex64" + [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q") (const_int 8) (const_int 8)) - (match_operand:SI 1 "nonmemory_operand" "Qn"))] + (match_operand:DI 1 "nonmemory_operand" "Qn"))] "TARGET_64BIT" "mov{b}\t{%b1, %h0|%h0, %b1}" [(set_attr "type" "imov") @@ -1704,9 +1777,8 @@ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") (const_int 8) (const_int 8)) - (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") - (const_int 8)) - (const_int 255)))] + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] "" "mov{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "imov") @@ -1824,8 +1896,7 @@ (clobber (reg:CC 17))] "TARGET_64BIT && (TARGET_PENTIUM || optimize_size) && reload_completed - && GET_CODE (operands[1]) == CONST_INT - && INTVAL (operands[1]) == -1" + && operands[1] == constm1_rtx" { operands[1] = constm1_rtx; return "or{q}\t{%1, %0|%0, %1}"; @@ -1869,19 +1940,23 @@ "ix86_split_long_move (operands); DONE;") (define_insn "*movdi_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!m*y,!*y,!*Y,!m,!*Y") - (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,m,*Y,*Y,*m"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!rm,!*y,!*Y,!rm,!*Y") + (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,rm,*Y,*Y,rm"))] "TARGET_64BIT + && (TARGET_INTER_UNIT_MOVES || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (get_attr_type (insn)) { case TYPE_SSEMOV: - if (register_operand (operands[0], DImode) - && register_operand (operands[1], DImode)) + if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ case TYPE_MMXMOV: + /* Moves from and into integer register is done using movd opcode with + REX prefix. */ + if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) + return "movd\t{%1, %0|%0, %1}"; return "movq\t{%1, %0|%0, %1}"; case TYPE_MULTI: return "#"; @@ -1899,9 +1974,9 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "5,6") + (cond [(eq_attr "alternative" "5,6,7") (const_string "mmxmov") - (eq_attr "alternative" "7,8,9") + (eq_attr "alternative" "8,9,10") (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") @@ -1910,11 +1985,57 @@ (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI")]) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")]) -;; Stores and loads of ax to arbitary constant address. +(define_insn "*movdi_1_rex64_nointerunit" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y") + (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,m,*Y,*Y,m"))] + "TARGET_64BIT + && (!TARGET_INTER_UNIT_MOVES && !optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSEMOV: + if (get_attr_mode (insn) == MODE_TI) + return "movdqa\t{%1, %0|%0, %1}"; + /* FALLTHRU */ + case TYPE_MMXMOV: + return "movq\t{%1, %0|%0, %1}"; + case TYPE_MULTI: + return "#"; + case TYPE_LEA: + return "lea{q}\t{%a1, %0|%0, %a1}"; + default: + if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) + abort (); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else if (which_alternative == 2) + return "movabs{q}\t{%1, %0|%0, %1}"; + else + return "mov{q}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5,6,7") + (const_string "mmxmov") + (eq_attr "alternative" "8,9,10") + (const_string "ssemov") + (eq_attr "alternative" "4") + (const_string "multi") + (and (ne (symbol_ref "flag_pic") (const_int 0)) + (match_operand:DI 1 "symbolic_operand" "")) + (const_string "lea") + ] + (const_string "imov"))) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")]) + +;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsdi_1_rex64" @@ -2007,22 +2128,11 @@ { switch (which_alternative) { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (4); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - case 1: return "push{l}\t%1"; - case 2: - return "#"; default: + /* This insn should be already split before reg-stack. */ abort (); } } @@ -2036,23 +2146,11 @@ { switch (which_alternative) { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (SFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - case 1: return "push{q}\t%q1"; - case 2: - return "#"; - default: + /* This insn should be already split before reg-stack. */ abort (); } } @@ -2089,7 +2187,8 @@ (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + "(TARGET_INTER_UNIT_MOVES || optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2113,25 +2212,117 @@ return "fst%z0\t%y0"; case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + else + return "xorps\t%0, %0"; + case 6: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; + case 7: + case 8: + return "movss\t{%1, %0|%0, %1}"; + + case 9: + case 10: + return "movd\t{%1, %0|%0, %1}"; + + case 11: + return "movq\t{%1, %0|%0, %1}"; + + default: abort(); + } +} + [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) + +(define_insn "*movsf_1_nointerunit" + [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!m,!*y") + (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,m,*y,*y"))] + "(!TARGET_INTER_UNIT_MOVES && !optimize_size) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], SFmode))" +{ + switch (which_alternative) + { + case 0: + if (REG_P (operands[1]) + && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } + else if (STACK_TOP_P (operands[0])) + return "fld%z1\t%y1"; + else + return "fst\t%y0"; + + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + + case 2: + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2 && !TARGET_ATHLON) + if (get_attr_mode (insn) == MODE_TI) return "pxor\t%0, %0"; else return "xorps\t%0, %0"; case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) + if (get_attr_mode (insn) == MODE_V4SF) return "movaps\t{%1, %0|%0, %1}"; else return "movss\t{%1, %0|%0, %1}"; @@ -2151,7 +2342,40 @@ } } [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") - (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_insn "*swapsf" [(set (match_operand:SF 0 "register_operand" "+f") @@ -2175,7 +2399,7 @@ "ix86_expand_move (DFmode, operands); DONE;") ;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer insturctions is 2+2*memory operand size +;; Size of pushdf using integer instructions is 2+2*memory operand size ;; On the average, pushdf using integers can be still shorter. Allow this ;; pattern for optimize_size too. @@ -2184,26 +2408,8 @@ (match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))] "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - case 3: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "DF,SI,SI,DF")]) @@ -2213,32 +2419,8 @@ (match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))] "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (8); - if (TARGET_64BIT) - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - else - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "DF,SI,DF")]) @@ -2275,7 +2457,7 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) + && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2286,7 +2468,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2299,44 +2486,90 @@ return "fst%z0\t%y0"; case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - abort(); + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: - return "movsd\t{%1, %0|%0, %1}"; + return "movsd\t{%1, %0|%0, %1}"; default: abort(); } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && !optimize_size && TARGET_INTEGER_DFMODE_MOVES + && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2347,7 +2580,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; + } else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2360,30 +2598,41 @@ return "fst%z0\t%y0"; case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - abort(); + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: return "movsd\t{%1, %0|%0, %1}"; @@ -2392,7 +2641,42 @@ } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_split [(set (match_operand:DF 0 "nonimmediate_operand" "") @@ -2426,17 +2710,11 @@ (define_expand "movxf" [(set (match_operand:XF 0 "nonimmediate_operand" "") (match_operand:XF 1 "general_operand" ""))] - "!TARGET_64BIT" - "ix86_expand_move (XFmode, operands); DONE;") - -(define_expand "movtf" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (match_operand:TF 1 "general_operand" ""))] "" - "ix86_expand_move (TFmode, operands); DONE;") + "ix86_expand_move (XFmode, operands); DONE;") ;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. -;; Size of pushdf using integer insturctions is 3+3*memory operand size +;; Size of pushdf using integer instructions is 3+3*memory operand size ;; Pushing using integer instructions is longer except for constants ;; and direct memory references. ;; (assuming that any given constant is pushed only once, but this ought to be @@ -2445,55 +2723,10 @@ (define_insn "*pushxf_nointeger" [(set (match_operand:XF 0 "push_operand" "=X,X,X") (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] - "!TARGET_64BIT && optimize_size" -{ - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (12); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - return "#"; - - default: - abort (); - } -} - [(set_attr "type" "multi") - (set_attr "mode" "XF,SI,SI")]) - -(define_insn "*pushtf_nointeger" - [(set (match_operand:TF 0 "push_operand" "=<,<,<") - (match_operand:TF 1 "general_no_elim_operand" "f,Fo,*r"))] "optimize_size" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (16); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - case 2: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "XF,SI,SI")]) @@ -2501,59 +2734,10 @@ (define_insn "*pushxf_integer" [(set (match_operand:XF 0 "push_operand" "=<,<") (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))] - "!TARGET_64BIT && !optimize_size" -{ - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (12); - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - return "#"; - - default: - abort (); - } -} - [(set_attr "type" "multi") - (set_attr "mode" "XF,SI")]) - -(define_insn "*pushtf_integer" - [(set (match_operand:TF 0 "push_operand" "=<,<") - (match_operand:TF 1 "general_no_elim_operand" "f#r,rFo#f"))] "!optimize_size" { - switch (which_alternative) - { - case 0: - /* %%% We loose REG_DEAD notes for controling pops if we split late. */ - operands[0] = gen_rtx_MEM (XFmode, stack_pointer_rtx); - operands[2] = stack_pointer_rtx; - operands[3] = GEN_INT (16); - if (TARGET_64BIT) - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{q}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{q}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - else - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "sub{l}\t{%3, %2|%2, %3}\;fstp%z0\t%y0"; - else - return "sub{l}\t{%3, %2|%2, %3}\;fst%z0\t%y0"; - - case 1: - return "#"; - - default: - abort (); - } + /* This insn should be already split before reg-stack. */ + abort (); } [(set_attr "type" "multi") (set_attr "mode" "XF,SI")]) @@ -2563,7 +2747,6 @@ (match_operand 1 "general_operand" ""))] "reload_completed && (GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == DFmode) && !ANY_FP_REG_P (operands[1])" [(const_int 0)] @@ -2573,29 +2756,23 @@ [(set (match_operand:XF 0 "push_operand" "") (match_operand:XF 1 "any_fp_register_operand" ""))] "!TARGET_64BIT" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) - (set (mem:XF (reg:SI 7)) (match_dup 1))]) - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "any_fp_register_operand" ""))] - "!TARGET_64BIT" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TF (reg:SI 7)) (match_dup 1))]) + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 2))) + (set (mem:XF (reg:SI 7)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_split - [(set (match_operand:TF 0 "push_operand" "") - (match_operand:TF 1 "any_fp_register_operand" ""))] + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "any_fp_register_operand" ""))] "TARGET_64BIT" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (set (mem:TF (reg:DI 7)) (match_dup 1))]) + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 2))) + (set (mem:XF (reg:DI 7)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") ;; Do not use integer registers when optimizing for size (define_insn "*movxf_nointeger" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "!TARGET_64BIT - && optimize_size + "optimize_size && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2606,54 +2783,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - - case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; } - break; - - case 3: case 4: - return "#"; - } - abort(); -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -(define_insn "*movtf_nointeger" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m,f,*r,o") - (match_operand:TF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && optimize_size - && (reload_in_progress || reload_completed - || GET_CODE (operands[1]) != CONST_DOUBLE - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || memory_operand (operands[0], TFmode))" -{ - switch (which_alternative) - { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2668,14 +2803,7 @@ return "fstp%z0\t%y0"; case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - break; + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; @@ -2688,8 +2816,7 @@ (define_insn "*movxf_integer" [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") (match_operand:XF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] - "!TARGET_64BIT - && !optimize_size + "!optimize_size && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || GET_CODE (operands[1]) != CONST_DOUBLE @@ -2700,54 +2827,12 @@ case 0: if (REG_P (operands[1]) && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\;fld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - - case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; + { + if (REGNO (operands[0]) == FIRST_STACK_REG + && TARGET_USE_FFREEP) + return "ffreep\t%y0"; + return "fstp\t%y0"; } - break; - - case 3: case 4: - return "#"; - } - abort(); -} - [(set_attr "type" "fmov,fmov,fmov,multi,multi") - (set_attr "mode" "XF,XF,XF,SI,SI")]) - -(define_insn "*movtf_integer" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") - (match_operand:TF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] - "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && !optimize_size - && (reload_in_progress || reload_completed - || GET_CODE (operands[1]) != CONST_DOUBLE - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) - || memory_operand (operands[0], TFmode))" -{ - switch (which_alternative) - { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; else if (STACK_TOP_P (operands[0])) return "fld%z1\t%y1"; else @@ -2762,14 +2847,7 @@ return "fstp%z0\t%y0"; case 2: - switch (standard_80387_constant_p (operands[1])) - { - case 1: - return "fldz"; - case 2: - return "fld1"; - } - break; + return standard_80387_constant_opcode (operands[1]); case 3: case 4: return "#"; @@ -2784,7 +2862,7 @@ (match_operand 1 "general_operand" ""))] "reload_completed && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) - && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode) + && GET_MODE (operands[0]) == XFmode && ! (ANY_FP_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG && ANY_FP_REG_P (SUBREG_REG (operands[0])))) @@ -2799,21 +2877,33 @@ (match_operand 1 "memory_operand" ""))] "reload_completed && GET_CODE (operands[1]) == MEM - && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == TFmode + && (GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode) && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)) - && (!(SSE_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (SUBREG_REG (operands[0])))) - || standard_sse_constant_p (get_pool_constant (XEXP (operands[1], 0)))) - && (!(FP_REG_P (operands[0]) || - (GET_CODE (operands[0]) == SUBREG - && FP_REG_P (SUBREG_REG (operands[0])))) - || standard_80387_constant_p (get_pool_constant (XEXP (operands[1], 0))))" - [(set (match_dup 0) - (match_dup 1))] - "operands[1] = get_pool_constant (XEXP (operands[1], 0));") + && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))" + [(set (match_dup 0) (match_dup 1))] +{ + rtx c = get_pool_constant (XEXP (operands[1], 0)); + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (!standard_80387_constant_p (c)) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; + + operands[1] = c; +}) (define_insn "swapxf" [(set (match_operand:XF 0 "register_operand" "+f") @@ -2829,21 +2919,6 @@ } [(set_attr "type" "fxch") (set_attr "mode" "XF")]) - -(define_insn "swaptf" - [(set (match_operand:TF 0 "register_operand" "+f") - (match_operand:TF 1 "register_operand" "+f")) - (set (match_dup 1) - (match_dup 0))] - "" -{ - if (STACK_TOP_P (operands[0])) - return "fxch\t%1"; - else - return "fxch\t%0"; -} - [(set_attr "type" "fxch") - (set_attr "mode" "XF")]) ;; Zero extension instructions @@ -3040,22 +3115,56 @@ ") (define_insn "zero_extendsidi2_32" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r"))) + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,!?y,!?Y") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,m,m"))) (clobber (reg:CC 17))] - "!TARGET_64BIT" - "#" - [(set_attr "mode" "SI")]) + "!TARGET_64BIT && !TARGET_INTER_UNIT_MOVES" + "@ + # + # + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "mode" "SI,SI,SI,DI,TI") + (set_attr "type" "multi,multi,multi,mmxmov,ssemov")]) + +(define_insn "*zero_extendsidi2_32_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,!?y,!?Y") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES" + "@ + # + # + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "mode" "SI,SI,SI,DI,TI") + (set_attr "type" "multi,multi,multi,mmxmov,ssemov")]) (define_insn "zero_extendsidi2_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0")))] - "TARGET_64BIT" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!?y,!?Y") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,m,m")))] + "TARGET_64BIT && !TARGET_INTER_UNIT_MOVES" "@ mov\t{%k1, %k0|%k0, %k1} - #" - [(set_attr "type" "imovx,imov") - (set_attr "mode" "SI,DI")]) + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,ssemov") + (set_attr "mode" "SI,DI,DI,TI")]) + +(define_insn "*zero_extendsidi2_rex64_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!?y,!*?") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))] + "TARGET_64BIT && TARGET_INTER_UNIT_MOVES" + "@ + mov\t{%k1, %k0|%k0, %k1} + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,ssemov") + (set_attr "mode" "SI,DI,SI,SI")]) (define_split [(set (match_operand:DI 0 "memory_operand" "") @@ -3077,7 +3186,8 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "") (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) (clobber (reg:CC 17))] - "!TARGET_64BIT && reload_completed" + "!TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_di (&operands[0], 1, &operands[3], &operands[4]);") @@ -3359,62 +3469,34 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] - "!TARGET_64BIT" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) - (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) - -(define_insn "*dummy_extendsftf2" - [(set (match_operand:TF 0 "push_operand" "=<") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "f")))] - "0" - "#") - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))] - "!TARGET_64BIT" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TF (reg:SI 7)) (float_extend:TF (match_dup 1)))]) + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 2))) + (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:SF 1 "fp_register_operand" "")))] + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] "TARGET_64BIT" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (set (mem:DF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) - -(define_insn "*dummy_extenddfxf2" - [(set (match_operand:XF 0 "push_operand" "=<") - (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "f")))] - "0" - "#") + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 2))) + (set (mem:DF (reg:DI 7)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_split [(set (match_operand:XF 0 "push_operand" "") (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] - "!TARGET_64BIT" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) - (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) - -(define_insn "*dummy_extenddftf2" - [(set (match_operand:TF 0 "push_operand" "=<") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "f")))] - "0" - "#") - -(define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))] - "!TARGET_64BIT" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 2))) + (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_split - [(set (match_operand:TF 0 "push_operand" "") - (float_extend:TF (match_operand:DF 1 "fp_register_operand" "")))] + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] "TARGET_64BIT" - [(set (reg:DI 7) (plus:DI (reg:DI 7) (const_int -16))) - (set (mem:TF (reg:DI 7)) (float_extend:TF (match_dup 1)))]) + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 2))) + (set (mem:XF (reg:DI 7)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") (define_expand "extendsfdf2" [(set (match_operand:DF 0 "nonimmediate_operand" "") @@ -3474,7 +3556,7 @@ (define_expand "extendsfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") (float_extend:XF (match_operand:SF 1 "general_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ @@ -3487,51 +3569,6 @@ (define_insn "*extendsfxf2_1" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "!TARGET_64BIT && TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - - default: - abort (); - } -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) - -(define_expand "extendsftf2" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:SF 1 "general_operand" "")))] - "TARGET_80387" -{ - /* ??? Needed for compress_float_constant since all fp constants - are LEGITIMATE_CONSTANT_P. */ - if (GET_CODE (operands[1]) == CONST_DOUBLE) - operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (SFmode, operands[1]); -}) - -(define_insn "*extendsftf2_1" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m") - (float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { @@ -3564,7 +3601,7 @@ (define_expand "extenddfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") (float_extend:XF (match_operand:DF 1 "general_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ @@ -3577,51 +3614,6 @@ (define_insn "*extenddfxf2_1" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] - "!TARGET_64BIT && TARGET_80387 - && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" -{ - switch (which_alternative) - { - case 0: - if (REG_P (operands[1]) - && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp\t%y0"; - else if (STACK_TOP_P (operands[0])) - return "fld%z1\t%y1"; - else - return "fst\t%y0"; - - case 1: - /* There is no non-popping store to memory for XFmode. So if - we need one, follow the store with a load. */ - if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0\n\tfld%z0\t%y0"; - else - return "fstp%z0\t%y0"; - - default: - abort (); - } -} - [(set_attr "type" "fmov") - (set_attr "mode" "DF,XF")]) - -(define_expand "extenddftf2" - [(set (match_operand:TF 0 "nonimmediate_operand" "") - (float_extend:TF (match_operand:DF 1 "general_operand" "")))] - "TARGET_80387" -{ - /* ??? Needed for compress_float_constant since all fp constants - are LEGITIMATE_CONSTANT_P. */ - if (GET_CODE (operands[1]) == CONST_DOUBLE) - operands[1] = validize_mem (force_const_mem (DFmode, operands[1])); - if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) - operands[1] = force_reg (DFmode, operands[1]); -}) - -(define_insn "*extenddftf2_1" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f,m") - (float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] "TARGET_80387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { @@ -3695,11 +3687,11 @@ (set_attr "mode" "SF,SF,SF,SF")]) (define_insn "*truncdfsf2_1_sse" - [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,Y") + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m#fxr,?f#xr,?r#fx,?x#fr,Y#fr") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) + (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] - "TARGET_80387 && TARGET_SSE2" + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" { switch (which_alternative) { @@ -3709,7 +3701,30 @@ else return "fst%z0\t%y0"; case 4: - return "cvtsd2ss\t{%1, %0|%0, %1}"; + return "#"; + default: + abort (); + } +} + [(set_attr "type" "fmov,multi,multi,multi,ssecvt") + (set_attr "mode" "SF,SF,SF,SF,DF")]) + +(define_insn "*truncdfsf2_1_sse_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f#Y,f#Y,f#Y,f#Y,mY#f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 4: + return "#"; default: abort (); } @@ -3718,16 +3733,41 @@ (set_attr "mode" "SF,SF,SF,SF,DF")]) (define_insn "*truncdfsf2_2" - [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m") + [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,Y,!m") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] - "TARGET_80387 && TARGET_SSE2 + (match_operand:DF 1 "nonimmediate_operand" "Y,mY,f#Y")))] + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) { case 0: + case 1: return "cvtsd2ss\t{%1, %0|%0, %1}"; + case 2: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort (); + } +} + [(set_attr "type" "ssecvt,ssecvt,fmov") + (set_attr "athlon_decode" "vector,double,*") + (set_attr "mode" "SF,SF,SF")]) + +(define_insn "*truncdfsf2_2_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return "#"; case 1: if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) return "fstp%z0\t%y0"; @@ -3740,7 +3780,7 @@ [(set_attr "type" "ssecvt,fmov") (set_attr "mode" "DF,SF")]) -(define_insn "truncdfsf2_3" +(define_insn "*truncdfsf2_3" [(set (match_operand:SF 0 "memory_operand" "=m") (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] @@ -3755,12 +3795,22 @@ (set_attr "mode" "SF")]) (define_insn "truncdfsf2_sse_only" - [(set (match_operand:SF 0 "register_operand" "=Y") + [(set (match_operand:SF 0 "register_operand" "=Y,Y") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "mY")))] - "!TARGET_80387 && TARGET_SSE2" + (match_operand:DF 1 "nonimmediate_operand" "Y,mY")))] + "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" "cvtsd2ss\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,double") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_sse_only_nooverlap" + [(set (match_operand:SF 0 "register_operand" "=&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY")))] + "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" + "#" + [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) (define_split @@ -3772,100 +3822,79 @@ [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] "") +; Avoid possible reformatting penalty on the destination by first +; zeroing it out (define_split - [(set (match_operand:SF 0 "nonimmediate_operand" "") + [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" ""))) (clobber (match_operand 2 "" ""))] "TARGET_80387 && reload_completed - && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:SF 0 "register_operand" "") - (float_truncate:SF - (match_operand:DF 1 "fp_register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:SF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - -(define_expand "truncxfsf2" - [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") - (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "!TARGET_64BIT && TARGET_80387" - "operands[2] = assign_386_stack_local (SFmode, 0);") - -(define_insn "*truncxfsf2_1" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] - "!TARGET_64BIT && TARGET_80387" + && SSE_REG_P (operands[0]) + && !STACK_REG_P (operands[1])" + [(const_int 0)] { - switch (which_alternative) + rtx src, dest; + if (!TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS) + emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1])); + else { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); } -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "mode" "SF")]) - -(define_insn "*truncxfsf2_2" - [(set (match_operand:SF 0 "memory_operand" "=m") - (float_truncate:SF - (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") - (set_attr "mode" "SF")]) + DONE; +}) (define_split - [(set (match_operand:SF 0 "memory_operand" "") + [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) - (clobber (match_operand:SF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed + && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS_FOR_CVTSD2SS" + [(const_int 0)] +{ + rtx src, dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + DONE; +}) (define_split [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF - (match_operand:XF 1 "register_operand" ""))) + (match_operand:DF 1 "fp_register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" [(set (match_dup 2) (float_truncate:SF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") -(define_expand "trunctfsf2" +(define_expand "truncxfsf2" [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "") (float_truncate:SF - (match_operand:TF 1 "register_operand" ""))) + (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] "TARGET_80387" "operands[2] = assign_386_stack_local (SFmode, 0);") -(define_insn "*trunctfsf2_1" +(define_insn "*truncxfsf2_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf") (float_truncate:SF - (match_operand:TF 1 "register_operand" "f,f,f,f"))) + (match_operand:XF 1 "register_operand" "f,f,f,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] "TARGET_80387" { @@ -3883,10 +3912,10 @@ [(set_attr "type" "fmov,multi,multi,multi") (set_attr "mode" "SF")]) -(define_insn "*trunctfsf2_2" +(define_insn "*truncxfsf2_2" [(set (match_operand:SF 0 "memory_operand" "=m") (float_truncate:SF - (match_operand:TF 1 "register_operand" "f")))] + (match_operand:XF 1 "register_operand" "f")))] "TARGET_80387" { if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) @@ -3900,7 +3929,7 @@ (define_split [(set (match_operand:SF 0 "memory_operand" "") (float_truncate:SF - (match_operand:TF 1 "register_operand" ""))) + (match_operand:XF 1 "register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] "TARGET_80387" [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] @@ -3909,20 +3938,19 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF - (match_operand:TF 1 "register_operand" ""))) + (match_operand:XF 1 "register_operand" ""))) (clobber (match_operand:SF 2 "memory_operand" ""))] "TARGET_80387 && reload_completed" [(set (match_dup 2) (float_truncate:SF (match_dup 1))) (set (match_dup 0) (match_dup 2))] "") - (define_expand "truncxfdf2" [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") (float_truncate:DF (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" "operands[2] = assign_386_stack_local (DFmode, 0);") (define_insn "*truncxfdf2_1" @@ -3930,7 +3958,7 @@ (float_truncate:DF (match_operand:XF 1 "register_operand" "f,f,f,f"))) (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" { switch (which_alternative) { @@ -3951,7 +3979,7 @@ [(set (match_operand:DF 0 "memory_operand" "=m") (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" { if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) return "fstp%z0\t%y0"; @@ -3980,69 +4008,6 @@ (set (match_dup 0) (match_dup 2))] "") -(define_expand "trunctfdf2" - [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "") - (float_truncate:DF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_dup 2))])] - "TARGET_80387" - "operands[2] = assign_386_stack_local (DFmode, 0);") - -(define_insn "*trunctfdf2_1" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf") - (float_truncate:DF - (match_operand:TF 1 "register_operand" "f,f,f,f"))) - (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] - "TARGET_80387" -{ - switch (which_alternative) - { - case 0: - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; - default: - abort(); - } - abort (); -} - [(set_attr "type" "fmov,multi,multi,multi") - (set_attr "mode" "DF")]) - - (define_insn "*trunctfdf2_2" - [(set (match_operand:DF 0 "memory_operand" "=m") - (float_truncate:DF - (match_operand:TF 1 "register_operand" "f")))] - "TARGET_80387" -{ - if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - return "fstp%z0\t%y0"; - else - return "fst%z0\t%y0"; -} - [(set_attr "type" "fmov") - (set_attr "mode" "DF")]) - -(define_split - [(set (match_operand:DF 0 "memory_operand" "") - (float_truncate:DF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] - "TARGET_80387" - [(set (match_dup 0) (float_truncate:DF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:DF 0 "register_operand" "") - (float_truncate:DF - (match_operand:TF 1 "register_operand" ""))) - (clobber (match_operand:DF 2 "memory_operand" ""))] - "TARGET_80387 && reload_completed" - [(set (match_dup 2) (float_truncate:DF (match_dup 1))) - (set (match_dup 0) (match_dup 2))] - "") - ;; %%% Break up all these bad boys. @@ -4051,12 +4016,6 @@ (define_expand "fix_truncxfdi2" [(set (match_operand:DI 0 "nonimmediate_operand" "") (fix:DI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "fix_trunctfdi2" - [(set (match_operand:DI 0 "nonimmediate_operand" "") - (fix:DI (match_operand:TF 1 "register_operand" "")))] "TARGET_80387" "") @@ -4102,6 +4061,7 @@ "&& 1" [(const_int 0)] { + ix86_optimize_mode_switching = 1; operands[2] = assign_386_stack_local (HImode, 1); operands[3] = assign_386_stack_local (HImode, 2); if (memory_operand (operands[0], VOIDmode)) @@ -4116,7 +4076,8 @@ } DONE; } - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "DI")]) (define_insn "fix_truncdi_nomemory" [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") @@ -4128,7 +4089,8 @@ "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" "#" - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "DI")]) (define_insn "fix_truncdi_memory" [(set (match_operand:DI 0 "memory_operand" "=m") @@ -4139,7 +4101,8 @@ "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)" "* operands[5] = operands[4]; return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "DI")]) (define_split [(set (match_operand:DI 0 "register_operand" "") @@ -4172,30 +4135,48 @@ ;; When SSE available, it is always faster to use it! (define_insn "fix_truncsfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r") - (fix:DI (match_operand:SF 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] "TARGET_64BIT && TARGET_SSE" "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt")]) + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) + +;; Avoid vector decoded form of the instruction. +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand:SF 1 "memory_operand" "")))] + "TARGET_K8 && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:DI (match_dup 2)))] + "") (define_insn "fix_truncdfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r") - (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] "TARGET_64BIT && TARGET_SSE2" "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt")]) + [(set_attr "type" "sseicvt,sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +;; Avoid vector decoded form of the instruction. +(define_peephole2 + [(match_scratch:DF 2 "Y") + (set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand:DF 1 "memory_operand" "")))] + "TARGET_K8 && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:DI (match_dup 2)))] + "") ;; Signed conversion to SImode. (define_expand "fix_truncxfsi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") (fix:SI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "fix_trunctfsi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (fix:SI (match_operand:TF 1 "register_operand" "")))] "TARGET_80387" "") @@ -4241,6 +4222,7 @@ "&& 1" [(const_int 0)] { + ix86_optimize_mode_switching = 1; operands[2] = assign_386_stack_local (HImode, 1); operands[3] = assign_386_stack_local (HImode, 2); if (memory_operand (operands[0], VOIDmode)) @@ -4255,7 +4237,8 @@ } DONE; } - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "SI")]) (define_insn "fix_truncsi_nomemory" [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r") @@ -4266,7 +4249,8 @@ "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "#" - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "SI")]) (define_insn "fix_truncsi_memory" [(set (match_operand:SI 0 "memory_operand" "=m") @@ -4276,22 +4260,47 @@ "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "SI")]) ;; When SSE available, it is always faster to use it! (define_insn "fix_truncsfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r") - (fix:SI (match_operand:SF 1 "nonimmediate_operand" "xm")))] + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt")]) + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +;; Avoid vector decoded form of the instruction. +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SI 0 "register_operand" "") + (fix:SI (match_operand:SF 1 "memory_operand" "")))] + "TARGET_K8 && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SI (match_dup 2)))] + "") (define_insn "fix_truncdfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r") - (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Ym")))] + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))] "TARGET_SSE2" "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt")]) + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,vector")]) + +;; Avoid vector decoded form of the instruction. +(define_peephole2 + [(match_scratch:DF 2 "Y") + (set (match_operand:SI 0 "register_operand" "") + (fix:SI (match_operand:DF 1 "memory_operand" "")))] + "TARGET_K8 && !optimize_size" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SI (match_dup 2)))] + "") (define_split [(set (match_operand:SI 0 "register_operand" "") @@ -4323,12 +4332,6 @@ (define_expand "fix_truncxfhi2" [(set (match_operand:HI 0 "nonimmediate_operand" "") (fix:HI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "fix_trunctfhi2" - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (fix:HI (match_operand:TF 1 "register_operand" "")))] "TARGET_80387" "") @@ -4356,6 +4359,7 @@ "" [(const_int 0)] { + ix86_optimize_mode_switching = 1; operands[2] = assign_386_stack_local (HImode, 1); operands[3] = assign_386_stack_local (HImode, 2); if (memory_operand (operands[0], VOIDmode)) @@ -4370,7 +4374,8 @@ } DONE; } - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "HI")]) (define_insn "fix_trunchi_nomemory" [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r") @@ -4381,7 +4386,8 @@ "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "#" - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "HI")]) (define_insn "fix_trunchi_memory" [(set (match_operand:HI 0 "memory_operand" "=m") @@ -4391,7 +4397,8 @@ "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1])) && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" "* return output_fix_trunc (insn, operands);" - [(set_attr "type" "fistp")]) + [(set_attr "type" "fistp") + (set_attr "mode" "HI")]) (define_split [(set (match_operand:HI 0 "memory_operand" "") @@ -4446,10 +4453,23 @@ ;; Even though we only accept memory inputs, the backend _really_ ;; wants to be able to do this between registers. -(define_insn "floathisf2" +(define_expand "floathisf2" + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_SSE || TARGET_80387" +{ + if (TARGET_SSE && TARGET_SSE_MATH) + { + emit_insn (gen_floatsisf2 (operands[0], + convert_to_mode (SImode, operands[1], 0))); + DONE; + } +}) + +(define_insn "*floathisf2_1" [(set (match_operand:SF 0 "register_operand" "=f,f") (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387 && !TARGET_SSE" + "TARGET_80387 && (!TARGET_SSE || !TARGET_SSE_MATH)" "@ fild%z1\t%1 #" @@ -4464,26 +4484,45 @@ "") (define_insn "*floatsisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] + [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" "@ fild%z1\t%1 # + cvtsi2ss\t{%1, %0|%0, %1} cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,ssecvt") + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "SF") + (set_attr "athlon_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:SI 1 "nonimmediate_operand" "mr")))] + [(set (match_operand:SF 0 "register_operand" "=x,x") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] "TARGET_SSE" "cvtsi2ss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,double") (set_attr "fp_int_src" "true")]) +; Avoid possible reformatting penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ss (dest, dest, operands[1])); + DONE; +}) + (define_expand "floatdisf2" [(set (match_operand:SF 0 "register_operand" "") (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] @@ -4502,30 +4541,62 @@ (set_attr "fp_int_src" "true")]) (define_insn "*floatdisf2_i387" - [(set (match_operand:SF 0 "register_operand" "=f,?f,x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] + [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE || TARGET_MIX_SSE_I387)" "@ fild%z1\t%1 # + cvtsi2ss{q}\t{%1, %0|%0, %1} cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,ssecvt") + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "SF") + (set_attr "athlon_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatdisf2_sse" - [(set (match_operand:SF 0 "register_operand" "=x") - (float:SF (match_operand:DI 1 "nonimmediate_operand" "mr")))] + [(set (match_operand:SF 0 "register_operand" "=x,x") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] "TARGET_64BIT && TARGET_SSE" "cvtsi2ss{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,double") (set_attr "fp_int_src" "true")]) -(define_insn "floathidf2" +; Avoid possible reformatting penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ssq (dest, dest, operands[1])); + DONE; +}) + +(define_expand "floathidf2" + [(set (match_operand:DF 0 "register_operand" "") + (float:DF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_SSE2 || TARGET_80387" +{ + if (TARGET_SSE && TARGET_SSE_MATH) + { + emit_insn (gen_floatsidf2 (operands[0], + convert_to_mode (SImode, operands[1], 0))); + DONE; + } +}) + +(define_insn "*floathidf2_1" [(set (match_operand:DF 0 "register_operand" "=f,f") (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "TARGET_80387 && !TARGET_SSE2" + "TARGET_80387 && (!TARGET_SSE2 || !TARGET_SSE_MATH)" "@ fild%z1\t%1 #" @@ -4540,24 +4611,27 @@ "") (define_insn "*floatsidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,mr")))] + [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" "@ fild%z1\t%1 # + cvtsi2sd\t{%1, %0|%0, %1} cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,ssecvt") + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "DF") + (set_attr "athlon_decode" "*,*,double,direct") (set_attr "fp_int_src" "true")]) (define_insn "*floatsidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (float:DF (match_operand:SI 1 "nonimmediate_operand" "mr")))] + [(set (match_operand:DF 0 "register_operand" "=Y,Y") + (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))] "TARGET_SSE2" "cvtsi2sd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct") (set_attr "fp_int_src" "true")]) (define_expand "floatdidf2" @@ -4578,40 +4652,32 @@ (set_attr "fp_int_src" "true")]) (define_insn "*floatdidf2_i387" - [(set (match_operand:DF 0 "register_operand" "=f,?f,Y") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,mr")))] + [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))] "TARGET_64BIT && TARGET_80387 && (!TARGET_SSE2 || TARGET_MIX_SSE_I387)" "@ fild%z1\t%1 # + cvtsi2sd{q}\t{%1, %0|%0, %1} cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "fmov,multi,ssecvt") + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") (set_attr "mode" "DF") + (set_attr "athlon_decode" "*,*,double,direct") (set_attr "fp_int_src" "true")]) (define_insn "*floatdidf2_sse" - [(set (match_operand:DF 0 "register_operand" "=Y") - (float:DF (match_operand:DI 1 "nonimmediate_operand" "mr")))] + [(set (match_operand:DF 0 "register_operand" "=Y,Y") + (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))] "TARGET_SSE2" "cvtsi2sd{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct") (set_attr "fp_int_src" "true")]) (define_insn "floathixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "fp_int_src" "true")]) - -(define_insn "floathitf2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (float:TF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] "TARGET_80387" "@ fild%z1\t%1 @@ -4623,17 +4689,6 @@ (define_insn "floatsixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "fp_int_src" "true")]) - -(define_insn "floatsitf2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (float:TF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] "TARGET_80387" "@ fild%z1\t%1 @@ -4645,17 +4700,6 @@ (define_insn "floatdixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" - "@ - fild%z1\t%1 - #" - [(set_attr "type" "fmov,multi") - (set_attr "mode" "XF") - (set_attr "fp_int_src" "true")]) - -(define_insn "floatditf2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (float:TF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] "TARGET_80387" "@ fild%z1\t%1 @@ -4677,6 +4721,189 @@ ix86_free_from_memory (GET_MODE (operands[1])); DONE; }) + +(define_expand "floatunssisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" ""))] + "TARGET_SSE && TARGET_SSE_MATH && !TARGET_64BIT" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "TARGET_SSE && TARGET_SSE_MATH && TARGET_64BIT" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_64BIT" + "x86_emit_floatuns (operands); DONE;") + +;; SSE extract/set expanders + +(define_expand "vec_setv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_insn (gen_sse2_movsd (operands[0], operands[0], + simplify_gen_subreg (V2DFmode, operands[1], + DFmode, 0))); + break; + case 1: + { + rtx op1 = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], op1)); + } + break; + default: + abort (); + } + DONE; +}) + +(define_expand "vec_extractv2df" + [(match_operand:DF 0 "register_operand" "") + (match_operand:V2DF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE2" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_move_insn (operands[0], gen_lowpart (DFmode, operands[1])); + break; + case 1: + { + rtx dest = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + + emit_insn (gen_sse2_unpckhpd (dest, operands[1], operands[1])); + } + break; + default: + abort (); + } + DONE; +}) + +(define_expand "vec_initv2df" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE2" +{ + ix86_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_setv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_insn (gen_sse_movss (operands[0], operands[0], + simplify_gen_subreg (V4SFmode, operands[1], + SFmode, 0))); + break; + case 1: + { + rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + rtx tmp = gen_reg_rtx (V4SFmode); + + emit_move_insn (tmp, operands[0]); + emit_insn (gen_sse_unpcklps (operands[0], operands[0], operands[0])); + emit_insn (gen_sse_movss (operands[0], operands[0], op1)); + emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, + GEN_INT (1 + (0<<2) + (2<<4) + (3<<6)))); + } + case 2: + { + rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + rtx tmp = gen_reg_rtx (V4SFmode); + + emit_move_insn (tmp, operands[0]); + emit_insn (gen_sse_movss (tmp, tmp, op1)); + emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, + GEN_INT (0 + (1<<2) + (0<<4) + (3<<6)))); + } + break; + case 3: + { + rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + rtx tmp = gen_reg_rtx (V4SFmode); + + emit_move_insn (tmp, operands[0]); + emit_insn (gen_sse_movss (tmp, tmp, op1)); + emit_insn (gen_sse_shufps (operands[0], operands[0], tmp, + GEN_INT (0 + (1<<2) + (2<<4) + (0<<6)))); + } + break; + default: + abort (); + } + DONE; +}) + +(define_expand "vec_extractv4sf" + [(match_operand:SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "TARGET_SSE" +{ + switch (INTVAL (operands[2])) + { + case 0: + emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1])); + break; + case 1: + { + rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + rtx tmp = gen_reg_rtx (V4SFmode); + + emit_move_insn (tmp, operands[1]); + emit_insn (gen_sse_shufps (op0, tmp, tmp, + GEN_INT (1))); + } + case 2: + { + rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + rtx tmp = gen_reg_rtx (V4SFmode); + + emit_move_insn (tmp, operands[1]); + emit_insn (gen_sse_unpckhps (op0, tmp, tmp)); + } + case 3: + { + rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + rtx tmp = gen_reg_rtx (V4SFmode); + + emit_move_insn (tmp, operands[1]); + emit_insn (gen_sse_shufps (op0, tmp, tmp, + GEN_INT (3))); + } + default: + abort (); + } + DONE; +}) + +(define_expand "vec_initv4sf" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SSE" +{ + ix86_expand_vector_init (operands[0], operands[1]); + DONE; +}) ;; Add instructions @@ -4719,9 +4946,9 @@ split_di (operands+1, 1, operands+1, operands+4); split_di (operands+2, 1, operands+2, operands+5);") -(define_insn "*adddi3_carry_rex64" +(define_insn "adddi3_carry_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") - (plus:DI (plus:DI (ltu:DI (reg:CC 17) (const_int 0)) + (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") (match_operand:DI 1 "nonimmediate_operand" "%0,0")) (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) (clobber (reg:CC 17))] @@ -4744,9 +4971,35 @@ [(set_attr "type" "alu") (set_attr "mode" "DI")]) -(define_insn "*addsi3_carry" +(define_insn "addqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 1 "nonimmediate_operand" "%0,0")) + (match_operand:QI 2 "general_operand" "qi,qm"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "adc{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI") + (set_attr "ppro_uops" "few")]) + +(define_insn "addhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 1 "nonimmediate_operand" "%0,0")) + (match_operand:HI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (PLUS, HImode, operands)" + "adc{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI") + (set_attr "ppro_uops" "few")]) + +(define_insn "addsi3_carry" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") - (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 1 "nonimmediate_operand" "%0,0")) (match_operand:SI 2 "general_operand" "ri,rm"))) (clobber (reg:CC 17))] @@ -4760,7 +5013,7 @@ (define_insn "*addsi3_carry_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (plus:SI (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 1 "nonimmediate_operand" "%0")) (match_operand:SI 2 "general_operand" "rim")))) (clobber (reg:CC 17))] @@ -4805,7 +5058,7 @@ (define_insn "*lea_1" [(set (match_operand:SI 0 "register_operand" "=r") - (match_operand:SI 1 "address_operand" "p"))] + (match_operand:SI 1 "no_seg_address_operand" "p"))] "!TARGET_64BIT" "lea{l}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -4813,7 +5066,7 @@ (define_insn "*lea_1_rex64" [(set (match_operand:SI 0 "register_operand" "=r") - (subreg:SI (match_operand:DI 1 "address_operand" "p") 0))] + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))] "TARGET_64BIT" "lea{l}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -4821,7 +5074,8 @@ (define_insn "*lea_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (subreg:SI (match_operand:DI 1 "address_operand" "p") 0)))] + (zero_extend:DI + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))] "TARGET_64BIT" "lea{l}\t{%a1, %k0|%k0, %a1}" [(set_attr "type" "lea") @@ -4829,7 +5083,7 @@ (define_insn "*lea_2_rex64" [(set (match_operand:DI 0 "register_operand" "=r") - (match_operand:DI 1 "address_operand" "p"))] + (match_operand:DI 1 "no_seg_address_operand" "p"))] "TARGET_64BIT" "lea{q}\t{%a1, %0|%0, %a1}" [(set_attr "type" "lea") @@ -5093,7 +5347,7 @@ if (! rtx_equal_p (operands[0], operands[1])) abort (); /* ???? We ought to handle there the 32bit case too - - do we need new constrant? */ + - do we need new constraint? */ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if (GET_CODE (operands[2]) == CONST_INT @@ -5143,7 +5397,7 @@ if (! rtx_equal_p (operands[0], operands[1])) abort (); /* ???? We ought to handle there the 32bit case too - - do we need new constrant? */ + - do we need new constraint? */ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. Exceptions: -128 encodes smaller than 128, so swap sign and op. */ if (GET_CODE (operands[2]) == CONST_INT @@ -5403,7 +5657,7 @@ (plus:SI (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")))) (clobber (reg:CC 17))] - "reload_completed + "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" [(set (match_dup 0) (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] @@ -5593,7 +5847,7 @@ (const_string "alu"))) (set_attr "mode" "SI")]) -; For comparisons agains 1, -1 and 128, we may generate better code +; For comparisons against 1, -1 and 128, we may generate better code ; by converting cmp to add, inc or dec as done by peephole2. This pattern ; is matched then. We can't accept general immediate, because for ; case of overflows, the result is messed up. @@ -6053,7 +6307,7 @@ if (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) < 0) { - operands[2] = GEN_INT (-INTVAL (operands[2])); + operands[1] = GEN_INT (-INTVAL (operands[1])); return "sub{b}\t{%1, %0|%0, %1}"; } return "add{b}\t{%1, %0|%0, %1}"; @@ -6311,13 +6565,6 @@ [(set (match_operand:XF 0 "register_operand" "") (plus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "addtf3" - [(set (match_operand:TF 0 "register_operand" "") - (plus:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -6375,7 +6622,7 @@ (define_insn "subdi3_carry_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") - (plus:DI (ltu:DI (reg:CC 17) (const_int 0)) + (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") (match_operand:DI 2 "x86_64_general_operand" "re,rm")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" @@ -6421,11 +6668,36 @@ [(set_attr "type" "alu") (set_attr "mode" "DI")]) +(define_insn "subqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 2 "general_operand" "qi,qm")))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (MINUS, QImode, operands)" + "sbb{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "ppro_uops" "few") + (set_attr "mode" "QI")]) + +(define_insn "subhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 2 "general_operand" "ri,rm")))) + (clobber (reg:CC 17))] + "ix86_binary_operator_ok (MINUS, HImode, operands)" + "sbb{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "ppro_uops" "few") + (set_attr "mode" "HI")]) (define_insn "subsi3_carry" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") - (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 2 "general_operand" "ri,rm")))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (MINUS, SImode, operands)" @@ -6439,7 +6711,7 @@ [(set (match_operand:DI 0 "register_operand" "=rm,r") (zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "0,0") - (plus:SI (ltu:SI (reg:CC 17) (const_int 0)) + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") (match_operand:SI 2 "general_operand" "ri,rm"))))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" @@ -6522,7 +6794,7 @@ (define_insn "*subsi_3_zext" [(set (reg 17) - (compare (match_operand:SI 1 "nonimmediate_operand" "0") + (compare (match_operand:SI 1 "register_operand" "0") (match_operand:SI 2 "general_operand" "rim"))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -6639,13 +6911,6 @@ [(set (match_operand:XF 0 "register_operand" "") (minus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "subtf3" - [(set (match_operand:TF 0 "register_operand" "") - (minus:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -6675,7 +6940,7 @@ (define_insn "*muldi3_1_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r,r") - (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,0,0") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:DI 2 "x86_64_general_operand" "K,e,mr"))) (clobber (reg:CC 17))] "TARGET_64BIT @@ -6686,6 +6951,15 @@ imul{q}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "DI")]) (define_expand "mulsi3" @@ -6698,56 +6972,50 @@ (define_insn "*mulsi3_1" [(set (match_operand:SI 0 "register_operand" "=r,r,r") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,0,0") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:SI 2 "general_operand" "K,i,mr"))) (clobber (reg:CC 17))] "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" - ; For the {r,0,i} alternative (i.e., register <- register * immediate), - ; there are two ways of writing the exact same machine instruction - ; in assembly language. One, for example, is: - ; - ; imul $12, %eax - ; - ; while the other is: - ; - ; imul $12, %eax, %eax - ; - ; The first is simply short-hand for the latter. But, some assemblers, - ; like the SCO OSR5 COFF assembler, don't handle the first form. "@ imul{l}\t{%2, %1, %0|%0, %1, %2} imul{l}\t{%2, %1, %0|%0, %1, %2} imul{l}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "SI")]) (define_insn "*mulsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,0,0") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:SI 2 "general_operand" "K,i,mr")))) (clobber (reg:CC 17))] "TARGET_64BIT && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" - ; For the {r,0,i} alternative (i.e., register <- register * immediate), - ; there are two ways of writing the exact same machine instruction - ; in assembly language. One, for example, is: - ; - ; imul $12, %eax - ; - ; while the other is: - ; - ; imul $12, %eax, %eax - ; - ; The first is simply short-hand for the latter. But, some assemblers, - ; like the SCO OSR5 COFF assembler, don't handle the first form. "@ imul{l}\t{%2, %1, %k0|%k0, %1, %2} imul{l}\t{%2, %1, %k0|%k0, %1, %2} imul{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "SI")]) (define_expand "mulhi3" @@ -6760,18 +7028,22 @@ (define_insn "*mulhi3_1" [(set (match_operand:HI 0 "register_operand" "=r,r,r") - (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,0,0") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") (match_operand:HI 2 "general_operand" "K,i,mr"))) (clobber (reg:CC 17))] "GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM" - ; %%% There was a note about "Assembler has weird restrictions", - ; concerning alternative 1 when op1 == op0. True? "@ imul{w}\t{%2, %1, %0|%0, %1, %2} imul{w}\t{%2, %1, %0|%0, %1, %2} imul{w}\t{%2, %0|%0, %2}" [(set_attr "type" "imul") (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1,2") + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "HI")]) (define_expand "mulqi3" @@ -6792,6 +7064,10 @@ "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) (set_attr "mode" "QI")]) (define_expand "umulqihi3" @@ -6814,6 +7090,10 @@ "mul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) (set_attr "mode" "QI")]) (define_expand "mulqihi3" @@ -6834,6 +7114,10 @@ "imul{b}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) (set_attr "mode" "QI")]) (define_expand "umulditi3" @@ -6857,6 +7141,10 @@ [(set_attr "type" "imul") (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "DI")]) ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers @@ -6881,6 +7169,10 @@ [(set_attr "type" "imul") (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "SI")]) (define_expand "mulditi3" @@ -6903,6 +7195,10 @@ "imul{q}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "DI")]) (define_expand "mulsidi3" @@ -6925,6 +7221,10 @@ "imul{l}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "SI")]) (define_expand "umuldi3_highpart" @@ -6958,6 +7258,10 @@ [(set_attr "type" "imul") (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "DI")]) (define_expand "umulsi3_highpart" @@ -6990,6 +7294,10 @@ [(set_attr "type" "imul") (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "SI")]) (define_insn "*umulsi3_highpart_zext" @@ -7009,6 +7317,10 @@ [(set_attr "type" "imul") (set_attr "ppro_uops" "few") (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "SI")]) (define_expand "smuldi3_highpart" @@ -7041,6 +7353,10 @@ "imul{q}\t%2" [(set_attr "type" "imul") (set_attr "ppro_uops" "few") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "DI")]) (define_expand "smulsi3_highpart" @@ -7072,6 +7388,10 @@ "imul{l}\t%2" [(set_attr "type" "imul") (set_attr "ppro_uops" "few") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "SI")]) (define_insn "*smulsi3_highpart_zext" @@ -7090,6 +7410,10 @@ "imul{l}\t%2" [(set_attr "type" "imul") (set_attr "ppro_uops" "few") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) (set_attr "mode" "SI")]) ;; The patterns that match these are at the end of this file. @@ -7098,13 +7422,6 @@ [(set (match_operand:XF 0 "register_operand" "") (mult:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "multf3" - [(set (match_operand:TF 0 "register_operand" "") - (mult:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -7152,13 +7469,6 @@ [(set (match_operand:XF 0 "register_operand" "") (div:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" - "") - -(define_expand "divtf3" - [(set (match_operand:TF 0 "register_operand" "") - (div:TF (match_operand:TF 1 "register_operand" "") - (match_operand:TF 2 "register_operand" "")))] "TARGET_80387" "") @@ -7189,7 +7499,7 @@ "") ;; Allow to come the parameter in eax or edx to avoid extra moves. -;; Penalize eax case sligthly because it results in worse scheduling +;; Penalize eax case slightly because it results in worse scheduling ;; of code. (define_insn "*divmoddi4_nocltd_rex64" [(set (match_operand:DI 0 "register_operand" "=&a,?a") @@ -7274,7 +7584,7 @@ "") ;; Allow to come the parameter in eax or edx to avoid extra moves. -;; Penalize eax case sligthly because it results in worse scheduling +;; Penalize eax case slightly because it results in worse scheduling ;; of code. (define_insn "*divmodsi4_nocltd" [(set (match_operand:SI 0 "register_operand" "=&a,?a") @@ -7502,10 +7812,11 @@ (define_insn "*testdi_1_rex64" [(set (reg 17) (compare - (and:DI (match_operand:DI 0 "nonimmediate_operand" "%*a,r,*a,r,rm") - (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,Z,e,e,re")) + (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm") + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re")) (const_int 0)))] - "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ test{l}\t{%k1, %k0|%k0, %k1} test{l}\t{%k1, %k0|%k0, %k1} @@ -7520,10 +7831,11 @@ (define_insn "testsi_1" [(set (reg 17) (compare - (and:SI (match_operand:SI 0 "nonimmediate_operand" "%*a,r,rm") - (match_operand:SI 1 "nonmemory_operand" "in,in,rin")) + (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:SI 1 "general_operand" "in,in,rin")) (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "test{l}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "modrm" "0,1,1") @@ -7541,10 +7853,11 @@ (define_insn "*testhi_1" [(set (reg 17) - (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%*a,r,rm") - (match_operand:HI 1 "nonmemory_operand" "n,n,rn")) + (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:HI 1 "general_operand" "n,n,rn")) (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "test{w}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "modrm" "0,1,1") @@ -7561,10 +7874,11 @@ (define_insn "*testqi_1" [(set (reg 17) - (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%*a,q,qm,r") - (match_operand:QI 1 "nonmemory_operand" "n,n,qn,n")) + (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r") + (match_operand:QI 1 "general_operand" "n,n,qn,n")) (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { if (which_alternative == 3) { @@ -7619,9 +7933,10 @@ (const_int 8) (const_int 8)) (zero_extend:SI - (match_operand:QI 1 "nonimmediate_operand" "Qm"))) + (match_operand:QI 1 "general_operand" "Qm"))) (const_int 0)))] - "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "test{b}\t{%1, %h0|%h0, %1}" [(set_attr "type" "test") (set_attr "mode" "QI")]) @@ -7747,7 +8062,7 @@ ;; Convert HImode/SImode test instructions with immediate to QImode ones. ;; i386 does not allow to encode test with 8bit sign extended immediate, so ;; this is relatively important trick. -;; Do the converison only post-reload to avoid limiting of the register class +;; Do the conversion only post-reload to avoid limiting of the register class ;; to QI regs. (define_split [(set (reg 17) @@ -8211,7 +8526,7 @@ ;; Convert wide AND instructions with immediate operand to shorter QImode ;; equivalents when possible. -;; Don't do the splitting with memory operands, since it intoduces risc +;; Don't do the splitting with memory operands, since it introduces risk ;; of memory mismatch stalls. We may want to do the splitting for optimizing ;; for size, but that can (should?) be handled by generic code instead. (define_split @@ -9265,12 +9580,15 @@ in register. */ rtx reg = gen_reg_rtx (SFmode); rtx dest = operands[0]; + rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode)); operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9289,7 +9607,7 @@ (define_insn "negsf2_ifs" [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x")) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed @@ -9310,7 +9628,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9320,13 +9638,15 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (xor:TI (subreg:TI (match_dup 1) 0) - (subreg:TI (match_dup 2) 0)))] + (xor:TI (match_dup 1) + (match_dup 2)))] { + operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0); if (operands_match_p (operands[0], operands[2])) { rtx tmp; @@ -9365,7 +9685,7 @@ [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = gen_int_mode (0x80000000, SImode); - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") + operands[0] = gen_lowpart (SImode, operands[0]);") (define_split [(set (match_operand 0 "memory_operand" "") @@ -9377,8 +9697,7 @@ { int size = GET_MODE_SIZE (GET_MODE (operands[1])); - /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ - if (size >= 12) + if (GET_MODE (operands[1]) == XFmode) size = 10; operands[0] = adjust_address (operands[0], QImode, size - 1); operands[1] = gen_int_mode (0x80, QImode); @@ -9399,7 +9718,7 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg; #if HOST_BITS_PER_WIDE_INT >= 64 rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else @@ -9409,7 +9728,10 @@ operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9428,7 +9750,7 @@ (define_insn "negdf2_ifs" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y")) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9438,8 +9760,8 @@ (define_insn "*negdf2_ifs_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9450,7 +9772,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (neg:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9460,7 +9782,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0]) && (!TARGET_64BIT || FP_REG_P (operands[0]))" @@ -9471,7 +9793,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9484,13 +9806,19 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (xor:TI (subreg:TI (match_dup 1) 0) - (subreg:TI (match_dup 2) 0)))] + (xor:TI (match_dup 1) + (match_dup 2)))] { + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0); + /* Avoid possible reformatting on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); if (operands_match_p (operands[0], operands[2])) { rtx tmp; @@ -9546,15 +9874,8 @@ [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) (clobber (reg:CC 17))])] - "!TARGET_64BIT && TARGET_80387" - "ix86_expand_unary_operator (NEG, XFmode, operands); DONE;") - -(define_expand "negtf2" - [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "") - (neg:TF (match_operand:TF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] "TARGET_80387" - "ix86_expand_unary_operator (NEG, TFmode, operands); DONE;") + "ix86_expand_unary_operator (NEG, XFmode, operands); DONE;") ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9563,7 +9884,7 @@ [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") (neg:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 + "TARGET_80387 && ix86_unary_operator_ok (NEG, XFmode, operands)" "#") @@ -9587,37 +9908,7 @@ operands[0] = gen_rtx_REG (SImode, true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") -;; Keep 'f' and 'r' in separate alternatives to avoid reload problems -;; because of secondary memory needed to reload from class FLOAT_INT_REGS -;; to itself. -(define_insn "*negtf2_if" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,rm#f") - (neg:TF (match_operand:TF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_80387 && ix86_unary_operator_ok (NEG, TFmode, operands)" - "#") - -(define_split - [(set (match_operand:TF 0 "fp_register_operand" "") - (neg:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (neg:TF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:TF 0 "register_and_not_fp_reg_operand" "") - (neg:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed" - [(parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (0x8000); - operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") - -;; Conditionize these after reload. If they matches before reload, we +;; Conditionalize these after reload. If they matches before reload, we ;; lose the clobber and ability to use integer instructions. (define_insn "*negsf2_1" @@ -9651,7 +9942,7 @@ (define_insn "*negxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && reload_completed" + "TARGET_80387 && reload_completed" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "XF") @@ -9661,7 +9952,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "XF") @@ -9671,35 +9962,6 @@ [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (float_extend:XF (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negtf2_1" - [(set (match_operand:TF 0 "register_operand" "=f") - (neg:TF (match_operand:TF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negextenddftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (neg:TF (float_extend:TF - (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "fchs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF") - (set_attr "ppro_uops" "few")]) - -(define_insn "*negextendsftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (neg:TF (float_extend:TF - (match_operand:SF 1 "register_operand" "0"))))] "TARGET_80387" "fchs" [(set_attr "type" "fsgn") @@ -9723,14 +9985,18 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (SFmode); + rtx reg = gen_reg_rtx (V4SFmode); rtx dest = operands[0]; + rtx imm; operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode)); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9747,20 +10013,20 @@ "#") (define_insn "abssf2_ifs" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x")) + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,xm*r,xm*r")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed || (register_operand (operands[0], VOIDmode) - && register_operand (operands[1], VOIDmode)))" + && register_operand (operands[1], VOIDmode)))" "#") (define_split [(set (match_operand:SF 0 "memory_operand" "") (abs:SF (match_operand:SF 1 "memory_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9770,7 +10036,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9780,12 +10046,23 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (match_dup 1) + (match_dup 2)))] +{ + operands[1] = simplify_gen_subreg (TImode, operands[1], SFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V4SFmode, 0); + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9801,7 +10078,7 @@ [(set (match_operand:SF 0 "fp_register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) (clobber (reg:CC 17))] - "TARGET_80387" + "TARGET_80387 && reload_completed" [(set (match_dup 0) (abs:SF (match_dup 1)))] "") @@ -9814,7 +10091,7 @@ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) (clobber (reg:CC 17))])] "operands[1] = gen_int_mode (~0x80000000, SImode); - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));") + operands[0] = gen_lowpart (SImode, operands[0]);") (define_split [(set (match_operand 0 "memory_operand" "") @@ -9826,8 +10103,7 @@ { int size = GET_MODE_SIZE (GET_MODE (operands[1])); - /* XFmode's size is 12, TFmode 16, but only 10 bytes are used. */ - if (size >= 12) + if (GET_MODE (operands[1]) == XFmode) size = 10; operands[0] = adjust_address (operands[0], QImode, size - 1); operands[1] = gen_int_mode (~0x80, QImode); @@ -9848,17 +10124,22 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg = gen_reg_rtx (V2DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); + rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode); #else - rtx imm = immed_double_const (0, 0x80000000, DImode); + rtx imm = immed_double_const (~0, ~0x80000000, DImode); #endif rtx dest = operands[0]; operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + + /* Produce LONG_DOUBLE with the proper immediate argument. */ + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9875,9 +10156,9 @@ "#") (define_insn "absdf2_ifs" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r,Ym*r")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9886,9 +10167,9 @@ "#") (define_insn "*absdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Ym*r")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9899,7 +10180,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (abs:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9909,7 +10190,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9919,12 +10200,27 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (match_dup 1) + (match_dup 2)))] +{ + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[1] = simplify_gen_subreg (TImode, operands[1], DFmode, 0); + operands[2] = simplify_gen_subreg (TImode, operands[2], V2DFmode, 0); + /* Avoid possible reformatting on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems @@ -9973,15 +10269,8 @@ [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) (clobber (reg:CC 17))])] - "!TARGET_64BIT && TARGET_80387" - "ix86_expand_unary_operator (ABS, XFmode, operands); DONE;") - -(define_expand "abstf2" - [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "") - (neg:TF (match_operand:TF 1 "nonimmediate_operand" ""))) - (clobber (reg:CC 17))])] "TARGET_80387" - "ix86_expand_unary_operator (ABS, TFmode, operands); DONE;") + "ix86_expand_unary_operator (ABS, XFmode, operands); DONE;") ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9990,7 +10279,7 @@ [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") (abs:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 + "TARGET_80387 && ix86_unary_operator_ok (ABS, XFmode, operands)" "#") @@ -10014,33 +10303,6 @@ operands[0] = gen_rtx_REG (SImode, true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") -(define_insn "*abstf2_if" - [(set (match_operand:TF 0 "nonimmediate_operand" "=f#r,rm#f") - (abs:TF (match_operand:TF 1 "nonimmediate_operand" "0,0"))) - (clobber (reg:CC 17))] - "TARGET_80387 && ix86_unary_operator_ok (ABS, TFmode, operands)" - "#") - -(define_split - [(set (match_operand:TF 0 "fp_register_operand" "") - (abs:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed" - [(set (match_dup 0) - (abs:TF (match_dup 1)))] - "") - -(define_split - [(set (match_operand:TF 0 "register_and_not_any_fp_reg_operand" "") - (abs:TF (match_operand:TF 1 "register_operand" ""))) - (clobber (reg:CC 17))] - "TARGET_80387 && reload_completed" - [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC 17))])] - "operands[1] = GEN_INT (~0x8000); - operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) + (TARGET_64BIT ? 1 : 2));") - (define_insn "*abssf2_1" [(set (match_operand:SF 0 "register_operand" "=f") (abs:SF (match_operand:SF 1 "register_operand" "0")))] @@ -10069,7 +10331,7 @@ (define_insn "*absxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && reload_completed" + "TARGET_80387 && reload_completed" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "DF")]) @@ -10078,7 +10340,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "XF")]) @@ -10087,32 +10349,6 @@ [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (float_extend:XF (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) - -(define_insn "*abstf2_1" - [(set (match_operand:TF 0 "register_operand" "=f") - (abs:TF (match_operand:TF 1 "register_operand" "0")))] - "TARGET_80387 && reload_completed" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "DF")]) - -(define_insn "*absextenddftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (abs:TF (float_extend:TF - (match_operand:DF 1 "register_operand" "0"))))] - "TARGET_80387" - "fabs" - [(set_attr "type" "fsgn") - (set_attr "mode" "XF")]) - -(define_insn "*absextendsftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (abs:TF (float_extend:TF - (match_operand:SF 1 "register_operand" "0"))))] "TARGET_80387" "fabs" [(set_attr "type" "fsgn") @@ -10697,9 +10933,11 @@ (zero_extend:DI (ashift (match_operand 1 "register_operand" "") (match_operand:QI 2 "const_int_operand" "")))) (clobber (reg:CC 17))] - "reload_completed + "TARGET_64BIT && reload_completed && true_regnum (operands[0]) != true_regnum (operands[1])" - [(set (match_dup 0) (zero_extend:DI (subreg:SI (mult:SI (match_dup 1) (match_dup 2)) 0)))] + [(set (match_dup 0) (zero_extend:DI + (subreg:SI (mult:SI (match_dup 1) + (match_dup 2)) 0)))] { operands[1] = gen_lowpart (Pmode, operands[1]); operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); @@ -11112,7 +11350,7 @@ (define_insn "*ashrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11142,7 +11380,7 @@ [(set (reg 17) (compare (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ashiftrt:DI (match_dup 1) (match_dup 2)))] @@ -11301,7 +11539,7 @@ (define_insn "*ashrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, SImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11315,7 +11553,7 @@ (define_insn "*ashrsi3_1_one_bit_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")))) + (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11354,7 +11592,7 @@ [(set (reg 17) (compare (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (ashiftrt:SI (match_dup 1) (match_dup 2)))] @@ -11372,7 +11610,7 @@ [(set (reg 17) (compare (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] @@ -11425,7 +11663,7 @@ (define_insn "*ashrhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11455,7 +11693,7 @@ [(set (reg 17) (compare (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashiftrt:HI (match_dup 1) (match_dup 2)))] @@ -11497,7 +11735,7 @@ (define_insn "*ashrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11511,7 +11749,7 @@ (define_insn "*ashrqi3_1_one_bit_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (ashiftrt:QI (match_dup 0) - (match_operand:QI 1 "const_int_1_operand" ""))) + (match_operand:QI 1 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ASHIFTRT, QImode, operands) && (! TARGET_PARTIAL_REG_STALL || optimize_size) @@ -11555,7 +11793,7 @@ [(set (reg 17) (compare (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "I")) + (match_operand:QI 2 "const1_operand" "I")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (ashiftrt:QI (match_dup 1) (match_dup 2)))] @@ -11609,7 +11847,7 @@ (define_insn "*lshrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11639,7 +11877,7 @@ [(set (reg 17) (compare (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "nonimmediate_operand" "=rm") (lshiftrt:DI (match_dup 1) (match_dup 2)))] @@ -11719,7 +11957,7 @@ (define_insn "*lshrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11733,7 +11971,7 @@ (define_insn "*lshrsi3_1_one_bit_zext" [(set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11773,7 +12011,7 @@ [(set (reg 17) (compare (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm") (lshiftrt:SI (match_dup 1) (match_dup 2)))] @@ -11791,7 +12029,7 @@ [(set (reg 17) (compare (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] @@ -11844,7 +12082,7 @@ (define_insn "*lshrhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11874,7 +12112,7 @@ [(set (reg 17) (compare (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm") (lshiftrt:HI (match_dup 1) (match_dup 2)))] @@ -11916,7 +12154,7 @@ (define_insn "*lshrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (LSHIFTRT, QImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -11930,7 +12168,7 @@ (define_insn "*lshrqi3_1_one_bit_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (lshiftrt:QI (match_dup 0) - (match_operand:QI 1 "const_int_1_operand" ""))) + (match_operand:QI 1 "const1_operand" ""))) (clobber (reg:CC 17))] "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (TARGET_SHIFT1 || optimize_size)" @@ -11973,7 +12211,7 @@ [(set (reg 17) (compare (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")) + (match_operand:QI 2 "const1_operand" "")) (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=qm") (lshiftrt:QI (match_dup 1) (match_dup 2)))] @@ -12017,7 +12255,7 @@ (define_insn "*rotlsi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12051,7 +12289,7 @@ (define_insn "*rotlsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, SImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12066,7 +12304,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (rotate:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")))) + (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12110,7 +12348,7 @@ (define_insn "*rotlhi3_1_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12144,7 +12382,7 @@ (define_insn "*rotlqi3_1_one_bit_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (rotate:QI (match_dup 0) - (match_operand:QI 1 "const_int_1_operand" ""))) + (match_operand:QI 1 "const1_operand" ""))) (clobber (reg:CC 17))] "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (TARGET_SHIFT1 || optimize_size)" @@ -12158,7 +12396,7 @@ (define_insn "*rotlqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATE, QImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12205,7 +12443,7 @@ (define_insn "*rotrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12239,7 +12477,7 @@ (define_insn "*rotrsi3_1_one_bit" [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, SImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12254,7 +12492,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (rotatert:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "const_int_1_operand" "")))) + (match_operand:QI 2 "const1_operand" "")))) (clobber (reg:CC 17))] "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12301,7 +12539,7 @@ (define_insn "*rotrhi3_one_bit" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, HImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12335,7 +12573,7 @@ (define_insn "*rotrqi3_1_one_bit" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "const_int_1_operand" ""))) + (match_operand:QI 2 "const1_operand" ""))) (clobber (reg:CC 17))] "ix86_binary_operator_ok (ROTATERT, QImode, operands) && (TARGET_SHIFT1 || optimize_size)" @@ -12349,7 +12587,7 @@ (define_insn "*rotrqi3_1_one_bit_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (rotatert:QI (match_dup 0) - (match_operand:QI 1 "const_int_1_operand" ""))) + (match_operand:QI 1 "const1_operand" ""))) (clobber (reg:CC 17))] "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (TARGET_SHIFT1 || optimize_size)" @@ -12422,10 +12660,10 @@ }) (define_expand "insv" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "") - (match_operand:SI 1 "immediate_operand" "") - (match_operand:SI 2 "immediate_operand" "")) - (match_operand:SI 3 "register_operand" ""))] + [(set (zero_extract (match_operand 0 "ext_register_operand" "") + (match_operand 1 "immediate_operand" "") + (match_operand 2 "immediate_operand" "")) + (match_operand 3 "register_operand" ""))] "" { /* Handle extractions from %ah et al. */ @@ -12436,6 +12674,13 @@ matches the predicate, so check it again here. */ if (! register_operand (operands[0], VOIDmode)) FAIL; + + if (TARGET_64BIT) + emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3])); + else + emit_insn (gen_movsi_insv_1 (operands[0], operands[3])); + + DONE; }) ;; %%% bts, btr, btc, bt. @@ -12648,7 +12893,7 @@ ;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; subsequent logical operations are used to imitate conditional moves. ;; 0xffffffff is NaN, but not in normalized form, so we can't represent -;; it directly. Futher holding this value in pseudo register might bring +;; it directly. Further holding this value in pseudo register might bring ;; problem in implicit normalization in spill code. ;; So we don't define FLOAT_STORE_FLAG_VALUE and create these ;; instructions after reload by splitting the conditional move patterns. @@ -13321,7 +13566,7 @@ (match_dup 2))] { operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); - operands[5] = gen_rtx_REG (QImode, REGNO (operands[3])); + operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) @@ -13343,7 +13588,7 @@ (match_dup 2))] { operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17); - operands[5] = gen_rtx_REG (QImode, REGNO (operands[3])); + operands[5] = gen_lowpart (QImode, operands[3]); ix86_expand_clear (operands[3]); }) @@ -13363,7 +13608,7 @@ (match_operand:SI 3 "" "")))])] "!TARGET_64BIT" { - ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3]); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0); DONE; }) @@ -13408,7 +13653,17 @@ (use (match_operand 2 "" ""))] "" { - ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL); + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0); + DONE; +}) + +(define_expand "sibcall" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1); DONE; }) @@ -13427,41 +13682,51 @@ (define_insn "*call_1" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] - "!TARGET_64BIT" + "!SIBLING_CALL_P (insn) && !TARGET_64BIT" { if (constant_call_address_operand (operands[0], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[0], QImode)) + return "jmp\t%P0"; + return "jmp\t%A0"; } [(set_attr "type" "call")]) (define_insn "*call_1_rex64" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] - "TARGET_64BIT" + "!SIBLING_CALL_P (insn) && TARGET_64BIT" { if (constant_call_address_operand (operands[0], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P0"; - else - return "call\t%P0"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A0"; - else - return "call\t%A0"; + return "call\t%P0"; + return "call\t%A0"; } [(set_attr "type" "call")]) +(define_insn "*sibcall_1_rex64" + [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t%P0" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64_v" + [(call (mem:QI (reg:DI 40)) + (match_operand 0 "" ""))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t*%%r11" + [(set_attr "type" "call")]) + + ;; Call subroutine, returning value in operand 0 (define_expand "call_value_pop" @@ -13474,7 +13739,7 @@ "!TARGET_64BIT" { ix86_expand_call (operands[0], operands[1], operands[2], - operands[3], operands[4]); + operands[3], operands[4], 0); DONE; }) @@ -13486,7 +13751,19 @@ ;; Operand 2 not used on the i386. "" { - ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL); + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0); + DONE; +}) + +(define_expand "sibcall_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1); DONE; }) @@ -13509,7 +13786,7 @@ ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1), - NULL); + NULL, 0); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -13561,6 +13838,19 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) +;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET +;; instruction Athlon and K8 have. + +(define_insn "return_internal_long" + [(return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep {;} ret" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + (define_insn "return_pop_internal" [(return) (use (match_operand:SI 0 "const_int_operand" ""))] @@ -13587,6 +13877,26 @@ (set_attr "modrm" "0") (set_attr "ppro_uops" "one")]) +;; Align to 16-byte boundary, max skip in op0. Used to avoid +;; branch prediction penalty for the third jump in a 16-byte +;; block on K8. + +(define_insn "align" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN + ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0])); +#else + /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. + The align insn is used to avoid 3 jump instructions in the row to improve + branch prediction and the benefits hardly outweight the cost of extra 8 + nops on the average inserted by full alignment pseudo operation. */ +#endif + return ""; +} + [(set_attr "length" "16")]) + (define_expand "prologue" [(const_int 1)] "" @@ -13657,11 +13967,7 @@ (clobber (mem:BLK (scratch)))] "!TARGET_64BIT" "leave" - [(set_attr "length_immediate" "0") - (set_attr "length" "1") - (set_attr "modrm" "0") - (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + [(set_attr "type" "leave")]) (define_insn "leave_rex64" [(set (reg:DI 7) (plus:DI (reg:DI 6) (const_int 8))) @@ -13669,111 +13975,100 @@ (clobber (mem:BLK (scratch)))] "TARGET_64BIT" "leave" - [(set_attr "length_immediate" "0") - (set_attr "length" "1") - (set_attr "modrm" "0") - (set_attr "athlon_decode" "vector") - (set_attr "ppro_uops" "few")]) + [(set_attr "type" "leave")]) (define_expand "ffssi2" - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_scratch:SI 2 "")) + (clobber (reg:CC 17))])] "" -{ - rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); - rtx in = operands[1]; + "") - if (TARGET_CMOVE) - { - emit_move_insn (tmp, constm1_rtx); - emit_insn (gen_ffssi_1 (out, in)); - emit_insn (gen_rtx_SET (VOIDmode, out, - gen_rtx_IF_THEN_ELSE (SImode, - gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx), - tmp, - out))); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_move_insn (operands[0], out); - } +(define_insn_and_split "*ffs_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&r")) + (clobber (reg:CC 17))] + "TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:SI + (eq (reg:CCZ 17) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] + "") - /* Pentium bsf instruction is extremly slow. The following code is - recommended by the Intel Optimizing Manual as a reasonable replacement: - TEST EAX,EAX - JZ SHORT BS2 - XOR ECX,ECX - MOV DWORD PTR [TEMP+4],ECX - SUB ECX,EAX - AND EAX,ECX - MOV DWORD PTR [TEMP],EAX - FILD QWORD PTR [TEMP] - FSTP QWORD PTR [TEMP] - WAIT ; WAIT only needed for compatibility with - ; earlier processors - MOV ECX, DWORD PTR [TEMP+4] - SHR ECX,20 - SUB ECX,3FFH - TEST EAX,EAX ; clear zero flag - BS2: - Following piece of code expand ffs to similar beast. - */ - - else if (TARGET_PENTIUM && !optimize_size && TARGET_80387) - { - rtx label = gen_label_rtx (); - rtx lo, hi; - rtx mem = assign_386_stack_local (DImode, 0); - rtx fptmp = gen_reg_rtx (DFmode); - split_di (&mem, 1, &lo, &hi); - - emit_move_insn (out, const0_rtx); - - emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label); - - emit_move_insn (hi, out); - emit_insn (gen_subsi3 (out, out, in)); - emit_insn (gen_andsi3 (out, out, in)); - emit_move_insn (lo, out); - emit_insn (gen_floatdidf2 (fptmp,mem)); - emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp); - emit_move_insn (out, hi); - emit_insn (gen_lshrsi3 (out, out, GEN_INT (20))); - emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1))); - - emit_label (label); - LABEL_NUSES (label) = 1; - - emit_move_insn (operands[0], out); - } - else - { - emit_move_insn (tmp, const0_rtx); - emit_insn (gen_ffssi_1 (out, in)); - emit_insn (gen_rtx_SET (VOIDmode, - gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)), - gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx))); - emit_insn (gen_negsi2 (tmp, tmp)); - emit_insn (gen_iorsi3 (out, out, tmp)); - emit_insn (gen_addsi3 (out, out, const1_rtx)); - emit_move_insn (operands[0], out); - } - DONE; +(define_insn_and_split "*ffs_no_cmove" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&q")) + (clobber (reg:CC 17))] + "" + "#" + "reload_completed" + [(parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (reg:CCZ 17) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC 17))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC 17))])] +{ + operands[3] = gen_lowpart (QImode, operands[2]); + ix86_expand_clear (operands[2]); }) -(define_insn "ffssi_1" +(define_insn "*ffssi_1" [(set (reg:CCZ 17) - (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (const_int 0))) (set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_dup 1)] UNSPEC_BSF))] + (ctz:SI (match_dup 1)))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "ppro_uops" "few")]) + +(define_insn "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC 17))] "" "bsf{l}\t{%1, %0|%0, %1}" [(set_attr "prefix_0f" "1") (set_attr "ppro_uops" "few")]) -;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger -;; and slower than the two-byte movzx insn needed to do the work in SImode. +(define_expand "clzsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC 17))]) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) + (clobber (reg:CC 17))])] + "" + "") + +(define_insn "*bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC 17))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "ppro_uops" "few")]) ;; Thread-local storage patterns for ELF. ;; @@ -13941,6 +14236,56 @@ (clobber (match_dup 5)) (clobber (reg:CC 17))])] "") + +;; Load and add the thread base pointer from %gs:0. + +(define_insn "*load_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "!TARGET_64BIT" + "mov{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand:SI 1 "register_operand" "0"))) + (clobber (reg:CC 17))] + "!TARGET_64BIT" + "add{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*load_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "TARGET_64BIT" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP) + (match_operand:DI 1 "register_operand" "0"))) + (clobber (reg:CC 17))] + "TARGET_64BIT" + "add{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) ;; These patterns match the binary 387 instructions for addM3, subM3, ;; mulM3 and divM3. There are three patterns for each of DFmode and @@ -14054,7 +14399,7 @@ (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "%0") (match_operand:XF 2 "register_operand" "f")]))] - "!TARGET_64BIT && TARGET_80387 + "TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14063,19 +14408,6 @@ (const_string "fop"))) (set_attr "mode" "XF")]) -(define_insn "*fop_tf_comm" - [(set (match_operand:TF 0 "register_operand" "=f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "%0") - (match_operand:TF 2 "register_operand" "f")]))] - "TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (if_then_else (match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (const_string "fop"))) - (set_attr "mode" "XF")]) - (define_insn "*fop_sf_1_nosse" [(set (match_operand:SF 0 "register_operand" "=f,f") (match_operator:SF 3 "binary_fp_operator" @@ -14307,35 +14639,36 @@ (const_string "fop"))) (set_attr "mode" "SF")]) -(define_insn "*fop_xf_1" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") - (match_operand:XF 2 "register_operand" "f,0")]))] - "!TARGET_64BIT && TARGET_80387 - && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" +(define_insn "*fop_df_6" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") + (cond [(match_operand:DF 3 "mult_operator" "") (const_string "fmul") - (match_operand:XF 3 "div_operator" "") + (match_operand:DF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "XF")]) + (set_attr "mode" "SF")]) -(define_insn "*fop_tf_1" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") - (match_operand:TF 2 "register_operand" "f,0")]))] +(define_insn "*fop_xf_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] "TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") + (cond [(match_operand:XF 3 "mult_operator" "") (const_string "fmul") - (match_operand:TF 3 "div_operator" "") + (match_operand:XF 3 "div_operator" "") (const_string "fdiv") ] (const_string "fop"))) @@ -14346,7 +14679,7 @@ (match_operator:XF 3 "binary_fp_operator" [(float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) (match_operand:XF 2 "register_operand" "0,0")]))] - "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" + "TARGET_80387 && TARGET_USE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14359,30 +14692,12 @@ (set_attr "mode" "SI") (set_attr "ppro_uops" "many")]) -(define_insn "*fop_tf_2" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float:TF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) - (match_operand:TF 2 "register_operand" "0,0")]))] - "TARGET_80387 && TARGET_USE_FIOP" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "many")]) - (define_insn "*fop_xf_3" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,0") (float:XF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" + "TARGET_80387 && TARGET_USE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14395,30 +14710,12 @@ (set_attr "mode" "SI") (set_attr "ppro_uops" "many")]) -(define_insn "*fop_tf_3" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,0") - (float:TF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "TARGET_80387 && TARGET_USE_FIOP" - "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "fp_int_src" "true") - (set_attr "mode" "SI") - (set_attr "ppro_uops" "many")]) - (define_insn "*fop_xf_4" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + [(float_extend:XF (match_operand 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" + "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14429,29 +14726,13 @@ (const_string "fop"))) (set_attr "mode" "SF")]) -(define_insn "*fop_tf_4" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float_extend:TF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) - (match_operand:TF 2 "register_operand" "0,f")]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) - (define_insn "*fop_xf_5" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "!TARGET_64BIT && TARGET_80387" + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14462,62 +14743,14 @@ (const_string "fop"))) (set_attr "mode" "SF")]) -(define_insn "*fop_tf_5" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") - (float_extend:TF - (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "SF")]) - (define_insn "*fop_xf_6" [(set (match_operand:XF 0 "register_operand" "=f,f") (match_operator:XF 3 "binary_fp_operator" - [(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) - (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:XF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:XF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_tf_6" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(float_extend:TF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) - (match_operand:TF 2 "register_operand" "0,f")]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_xf_7" - [(set (match_operand:XF 0 "register_operand" "=f,f") - (match_operator:XF 3 "binary_fp_operator" - [(match_operand:XF 1 "register_operand" "0,f") + [(float_extend:XF + (match_operand 1 "register_operand" "0,f")) (float_extend:XF - (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] - "!TARGET_64BIT && TARGET_80387" + (match_operand 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14526,24 +14759,7 @@ (const_string "fdiv") ] (const_string "fop"))) - (set_attr "mode" "DF")]) - -(define_insn "*fop_tf_7" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (match_operator:TF 3 "binary_fp_operator" - [(match_operand:TF 1 "register_operand" "0,f") - (float_extend:TF - (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] - "TARGET_80387" - "* return output_387_binary_op (insn, operands);" - [(set (attr "type") - (cond [(match_operand:TF 3 "mult_operator" "") - (const_string "fmul") - (match_operand:TF 3 "div_operator" "") - (const_string "fdiv") - ] - (const_string "fop"))) - (set_attr "mode" "DF")]) + (set_attr "mode" "SF")]) (define_split [(set (match_operand 0 "register_operand" "") @@ -14682,17 +14898,7 @@ (define_insn "sqrtxf2" [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 - && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) - -(define_insn "sqrttf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (sqrt:TF (match_operand:TF 1 "register_operand" "0")))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + "TARGET_80387 && !TARGET_NO_FANCY_MATH_387 && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " "fsqrt" [(set_attr "type" "fpspc") @@ -14703,17 +14909,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) - -(define_insn "*sqrtextenddftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (sqrt:TF (float_extend:TF - (match_operand:DF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" + "TARGET_80387 && !TARGET_NO_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") @@ -14723,17 +14919,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (float_extend:XF (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" - "fsqrt" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) - -(define_insn "*sqrtextendsftf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (sqrt:TF (float_extend:TF - (match_operand:SF 1 "register_operand" "0"))))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387" + "TARGET_80387 && !TARGET_NO_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") @@ -14771,16 +14957,7 @@ (define_insn "sinxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 - && flag_unsafe_math_optimizations" - "fsin" - [(set_attr "type" "fpspc") - (set_attr "mode" "XF")]) - -(define_insn "sintf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_SIN))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + "TARGET_80387 && !TARGET_NO_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fsin" [(set_attr "type" "fpspc") @@ -14818,20 +14995,333 @@ (define_insn "cosxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] - "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) -(define_insn "costf2" - [(set (match_operand:TF 0 "register_operand" "=f") - (unspec:TF [(match_operand:TF 1 "register_operand" "0")] UNSPEC_COS))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 +(define_insn "atan2df3_1" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 2 "register_operand" "0") + (match_operand:DF 1 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:DF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" - "fcos" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_expand "atan2df3" + [(use (match_operand:DF 0 "register_operand" "=f")) + (use (match_operand:DF 2 "register_operand" "0")) + (use (match_operand:DF 1 "register_operand" "u"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx copy = gen_reg_rtx (DFmode); + emit_move_insn (copy, operands[1]); + emit_insn (gen_atan2df3_1 (operands[0], copy, operands[2])); + DONE; +}) + +(define_insn "atan2sf3_1" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 2 "register_operand" "0") + (match_operand:SF 1 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:SF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_expand "atan2sf3" + [(use (match_operand:SF 0 "register_operand" "=f")) + (use (match_operand:SF 2 "register_operand" "0")) + (use (match_operand:SF 1 "register_operand" "u"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx copy = gen_reg_rtx (SFmode); + emit_move_insn (copy, operands[1]); + emit_insn (gen_atan2sf3_1 (operands[0], copy, operands[2])); + DONE; +}) + +(define_insn "atan2xf3_1" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "atan2xf3" + [(use (match_operand:XF 0 "register_operand" "=f")) + (use (match_operand:XF 2 "register_operand" "0")) + (use (match_operand:XF 1 "register_operand" "u"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx copy = gen_reg_rtx (XFmode); + emit_move_insn (copy, operands[1]); + emit_insn (gen_atan2xf3_1 (operands[0], copy, operands[2])); + DONE; +}) + +(define_insn "*fyl2x_sfxf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:SF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:SF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_insn "*fyl2x_dfxf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:DF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "*fyl2x_xf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logsf2" + [(parallel [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:SF 3 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (4); /* fldln2 */ + emit_move_insn (operands[2], temp); +}) + +(define_expand "logdf2" + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_operand:DF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:DF 3 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (4); /* fldln2 */ + emit_move_insn (operands[2], temp); +}) + +(define_expand "logxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + + operands[2] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (4); /* fldln2 */ + emit_move_insn (operands[2], temp); +}) + +(define_insn "*fscale_sfxf3" + [(set (match_operand:SF 0 "register_operand" "=f") + (unspec:SF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FSCALE)) + (clobber (match_scratch:SF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fscale\;fstp\t%y1" + [(set_attr "type" "fpspc") + (set_attr "mode" "SF")]) + +(define_insn "*fscale_dfxf3" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FSCALE)) + (clobber (match_scratch:DF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fscale\;fstp\t%y1" + [(set_attr "type" "fpspc") + (set_attr "mode" "DF")]) + +(define_insn "*fscale_xf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 1 "register_operand" "u")] + UNSPEC_FSCALE)) + (clobber (match_scratch:XF 3 "=1"))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "fscale\;fstp\t%y1" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) + +(define_insn "*frndintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "frndint" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*f2xm1xf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_F2XM1))] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "expsf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:SF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8))) + (parallel [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_dup 9) (match_dup 5)] UNSPEC_FSCALE)) + (clobber (match_scratch:SF 5 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<10; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expdf2" + [(set (match_dup 2) + (float_extend:XF (match_operand:DF 1 "register_operand" ""))) + (set (match_dup 4) (mult:XF (match_dup 2) (match_dup 3))) + (set (match_dup 5) (unspec:XF [(match_dup 4)] UNSPEC_FRNDINT)) + (set (match_dup 6) (minus:XF (match_dup 4) (match_dup 5))) + (set (match_dup 7) (unspec:XF [(match_dup 6)] UNSPEC_F2XM1)) + (set (match_dup 9) (plus:XF (match_dup 7) (match_dup 8))) + (parallel [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_dup 9) (match_dup 5)] UNSPEC_FSCALE)) + (clobber (match_scratch:DF 5 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<10; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[3], temp); + emit_move_insn (operands[8], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expxf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 8) (match_dup 4)] UNSPEC_FSCALE)) + (clobber (match_scratch:XF 5 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + rtx temp; + int i; + + for (i=2; i<9; i++) + operands[i] = gen_reg_rtx (XFmode); + temp = standard_80387_constant_rtx (5); /* fldl2e */ + emit_move_insn (operands[2], temp); + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "atansf2" + [(parallel [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_dup 2) + (match_operand:SF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:SF 3 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (SFmode); + emit_move_insn (operands[2], CONST1_RTX (SFmode)); /* fld1 */ +}) + +(define_expand "atandf2" + [(parallel [(set (match_operand:DF 0 "register_operand" "") + (unspec:DF [(match_dup 2) + (match_operand:DF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:DF 3 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (DFmode); + emit_move_insn (operands[2], CONST1_RTX (DFmode)); /* fld1 */ +}) + +(define_expand "atanxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 2) + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) ;; Block operation instructions @@ -14846,7 +15336,7 @@ (use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:SI 2 "nonmemory_operand" "")) (use (match_operand:SI 3 "const_int_operand" ""))] - "" + "! optimize_size" { if (ix86_expand_movstr (operands[0], operands[1], operands[2], operands[3])) DONE; @@ -14870,170 +15360,45 @@ ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. -(define_expand "strmovdi_rex64" - [(set (match_dup 2) - (mem:DI (match_operand:DI 1 "register_operand" ""))) - (set (mem:DI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8))) +(define_expand "strmov" + [(set (match_dup 4) (match_operand 3 "memory_operand" "")) + (set (match_operand 1 "memory_operand" "") (match_dup 4)) + (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5)) (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 8))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovdi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (DImode); -}) - - -(define_expand "strmovsi" - [(set (match_dup 2) - (mem:SI (match_operand:SI 1 "register_operand" ""))) - (set (mem:SI (match_operand:SI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4))) + (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6)) (clobber (reg:CC 17))])] "" { - if (TARGET_64BIT) - { - emit_insn (gen_strmovsi_rex64 (operands[0], operands[1])); - DONE; - } - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (SImode); -}) + rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1]))); -(define_expand "strmovsi_rex64" - [(set (match_dup 2) - (mem:SI (match_operand:DI 1 "register_operand" ""))) - (set (mem:SI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 4))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovsi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (SImode); -}) + /* If .md ever supports :P for Pmode, these can be directly + in the pattern above. */ + operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); + operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); -(define_expand "strmovhi" - [(set (match_dup 2) - (mem:HI (match_operand:SI 1 "register_operand" ""))) - (set (mem:HI (match_operand:SI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 2))) - (clobber (reg:CC 17))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strmovhi_rex64 (operands[0], operands[1])); - DONE; - } if (TARGET_SINGLE_STRINGOP || optimize_size) { - emit_insn (gen_strmovhi_1 (operands[0], operands[1], operands[0], - operands[1])); + emit_insn (gen_strmov_singleop (operands[0], operands[1], + operands[2], operands[3], + operands[5], operands[6])); DONE; } - else - operands[2] = gen_reg_rtx (HImode); -}) -(define_expand "strmovhi_rex64" - [(set (match_dup 2) - (mem:HI (match_operand:DI 1 "register_operand" ""))) - (set (mem:HI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 2))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovhi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (HImode); + operands[4] = gen_reg_rtx (GET_MODE (operands[1])); }) -(define_expand "strmovqi" - [(set (match_dup 2) - (mem:QI (match_operand:SI 1 "register_operand" ""))) - (set (mem:QI (match_operand:SI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 1))) - (clobber (reg:CC 17))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strmovqi_rex64 (operands[0], operands[1])); - DONE; - } - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovqi_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (QImode); -}) - -(define_expand "strmovqi_rex64" - [(set (match_dup 2) - (mem:QI (match_operand:DI 1 "register_operand" ""))) - (set (mem:QI (match_operand:DI 0 "register_operand" "")) - (match_dup 2)) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))]) - (parallel [(set (match_dup 1) (plus:DI (match_dup 1) (const_int 1))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strmovqi_rex_1 (operands[0], operands[1], operands[0], - operands[1])); - DONE; - } - else - operands[2] = gen_reg_rtx (QImode); -}) +(define_expand "strmov_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 5 "" "")) + (use (reg:SI 19))])] + "TARGET_SINGLE_STRINGOP || optimize_size" + "") -(define_insn "strmovdi_rex_1" +(define_insn "*strmovdi_rex_1" [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) (mem:DI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15049,7 +15414,7 @@ (set_attr "mode" "DI") (set_attr "memory" "both")]) -(define_insn "strmovsi_1" +(define_insn "*strmovsi_1" [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) (mem:SI (match_operand:SI 3 "register_operand" "1"))) (set (match_operand:SI 0 "register_operand" "=D") @@ -15065,7 +15430,7 @@ (set_attr "mode" "SI") (set_attr "memory" "both")]) -(define_insn "strmovsi_rex_1" +(define_insn "*strmovsi_rex_1" [(set (mem:SI (match_operand:DI 2 "register_operand" "0")) (mem:SI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15081,7 +15446,7 @@ (set_attr "mode" "SI") (set_attr "memory" "both")]) -(define_insn "strmovhi_1" +(define_insn "*strmovhi_1" [(set (mem:HI (match_operand:SI 2 "register_operand" "0")) (mem:HI (match_operand:SI 3 "register_operand" "1"))) (set (match_operand:SI 0 "register_operand" "=D") @@ -15097,7 +15462,7 @@ (set_attr "memory" "both") (set_attr "mode" "HI")]) -(define_insn "strmovhi_rex_1" +(define_insn "*strmovhi_rex_1" [(set (mem:HI (match_operand:DI 2 "register_operand" "0")) (mem:HI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15113,7 +15478,7 @@ (set_attr "memory" "both") (set_attr "mode" "HI")]) -(define_insn "strmovqi_1" +(define_insn "*strmovqi_1" [(set (mem:QI (match_operand:SI 2 "register_operand" "0")) (mem:QI (match_operand:SI 3 "register_operand" "1"))) (set (match_operand:SI 0 "register_operand" "=D") @@ -15129,7 +15494,7 @@ (set_attr "memory" "both") (set_attr "mode" "QI")]) -(define_insn "strmovqi_rex_1" +(define_insn "*strmovqi_rex_1" [(set (mem:QI (match_operand:DI 2 "register_operand" "0")) (mem:QI (match_operand:DI 3 "register_operand" "1"))) (set (match_operand:DI 0 "register_operand" "=D") @@ -15145,7 +15510,20 @@ (set_attr "memory" "both") (set_attr "mode" "QI")]) -(define_insn "rep_movdi_rex64" +(define_expand "rep_mov" + [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 5 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 6 "" "")) + (set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (use (match_dup 4)) + (use (reg:SI 19))])] + "" + "") + +(define_insn "*rep_movdi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") @@ -15165,7 +15543,7 @@ (set_attr "memory" "both") (set_attr "mode" "DI")]) -(define_insn "rep_movsi" +(define_insn "*rep_movsi" [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2") @@ -15185,7 +15563,7 @@ (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_insn "rep_movsi_rex64" +(define_insn "*rep_movsi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") @@ -15205,7 +15583,7 @@ (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_insn "rep_movqi" +(define_insn "*rep_movqi" [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_operand:SI 3 "register_operand" "0") @@ -15223,7 +15601,7 @@ (set_attr "memory" "both") (set_attr "mode" "SI")]) -(define_insn "rep_movqi_rex64" +(define_insn "*rep_movqi_rex64" [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_operand:DI 3 "register_operand" "0") @@ -15268,120 +15646,40 @@ ;; Most CPUs don't like single string operations ;; Handle this case here to simplify previous expander. -(define_expand "strsetdi_rex64" - [(set (mem:DI (match_operand:DI 0 "register_operand" "")) - (match_operand:DI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 8))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetdi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsetsi" - [(set (mem:SI (match_operand:SI 0 "register_operand" "")) - (match_operand:SI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strsetsi_rex64 (operands[0], operands[1])); - DONE; - } - else if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsetsi_rex64" - [(set (mem:SI (match_operand:DI 0 "register_operand" "")) - (match_operand:SI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 4))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetsi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsethi" - [(set (mem:HI (match_operand:SI 0 "register_operand" "")) - (match_operand:HI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 2))) +(define_expand "strset" + [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (parallel [(set (match_operand 0 "register_operand" "") + (match_dup 3)) (clobber (reg:CC 17))])] "" { - if (TARGET_64BIT) - { - emit_insn (gen_strsethi_rex64 (operands[0], operands[1])); - DONE; - } - else if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsethi_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsethi_rex64" - [(set (mem:HI (match_operand:DI 0 "register_operand" "")) - (match_operand:HI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 2))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ + if (GET_MODE (operands[1]) != GET_MODE (operands[2])) + operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); + + /* If .md ever supports :P for Pmode, this can be directly + in the pattern above. */ + operands[3] = gen_rtx_PLUS (Pmode, operands[0], + GEN_INT (GET_MODE_SIZE (GET_MODE + (operands[2])))); if (TARGET_SINGLE_STRINGOP || optimize_size) { - emit_insn (gen_strsethi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) - -(define_expand "strsetqi" - [(set (mem:QI (match_operand:SI 0 "register_operand" "")) - (match_operand:QI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_strsetqi_rex64 (operands[0], operands[1])); - DONE; - } - else if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetqi_1 (operands[0], operands[0], operands[1])); + emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], + operands[3])); DONE; } }) -(define_expand "strsetqi_rex64" - [(set (mem:QI (match_operand:DI 0 "register_operand" "")) - (match_operand:QI 1 "register_operand" "")) - (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) - (clobber (reg:CC 17))])] - "TARGET_64BIT" -{ - if (TARGET_SINGLE_STRINGOP || optimize_size) - { - emit_insn (gen_strsetqi_rex_1 (operands[0], operands[0], operands[1])); - DONE; - } -}) +(define_expand "strset_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 3 "" "")) + (use (reg:SI 19))])] + "TARGET_SINGLE_STRINGOP || optimize_size" + "") -(define_insn "strsetdi_rex_1" +(define_insn "*strsetdi_rex_1" [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") @@ -15394,7 +15692,7 @@ (set_attr "memory" "store") (set_attr "mode" "DI")]) -(define_insn "strsetsi_1" +(define_insn "*strsetsi_1" [(set (mem:SI (match_operand:SI 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) (set (match_operand:SI 0 "register_operand" "=D") @@ -15407,7 +15705,7 @@ (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "strsetsi_rex_1" +(define_insn "*strsetsi_rex_1" [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) (match_operand:SI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") @@ -15420,7 +15718,7 @@ (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "strsethi_1" +(define_insn "*strsethi_1" [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) (match_operand:HI 2 "register_operand" "a")) (set (match_operand:SI 0 "register_operand" "=D") @@ -15433,7 +15731,7 @@ (set_attr "memory" "store") (set_attr "mode" "HI")]) -(define_insn "strsethi_rex_1" +(define_insn "*strsethi_rex_1" [(set (mem:HI (match_operand:DI 1 "register_operand" "0")) (match_operand:HI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") @@ -15446,7 +15744,7 @@ (set_attr "memory" "store") (set_attr "mode" "HI")]) -(define_insn "strsetqi_1" +(define_insn "*strsetqi_1" [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "a")) (set (match_operand:SI 0 "register_operand" "=D") @@ -15459,7 +15757,7 @@ (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_insn "strsetqi_rex_1" +(define_insn "*strsetqi_rex_1" [(set (mem:QI (match_operand:DI 1 "register_operand" "0")) (match_operand:QI 2 "register_operand" "a")) (set (match_operand:DI 0 "register_operand" "=D") @@ -15472,7 +15770,18 @@ (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_insn "rep_stosdi_rex64" +(define_expand "rep_stos" + [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "memory_operand" "") (const_int 0)) + (use (match_operand 3 "register_operand" "")) + (use (match_dup 1)) + (use (reg:SI 19))])] + "" + "") + +(define_insn "*rep_stosdi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") @@ -15490,7 +15799,7 @@ (set_attr "memory" "store") (set_attr "mode" "DI")]) -(define_insn "rep_stossi" +(define_insn "*rep_stossi" [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1") @@ -15508,7 +15817,7 @@ (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "rep_stossi_rex64" +(define_insn "*rep_stossi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") @@ -15526,7 +15835,7 @@ (set_attr "memory" "store") (set_attr "mode" "SI")]) -(define_insn "rep_stosqi" +(define_insn "*rep_stosqi" [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) (set (match_operand:SI 0 "register_operand" "=D") (plus:SI (match_operand:SI 3 "register_operand" "0") @@ -15543,7 +15852,7 @@ (set_attr "memory" "store") (set_attr "mode" "QI")]) -(define_insn "rep_stosqi_rex64" +(define_insn "*rep_stosqi_rex64" [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_operand:DI 3 "register_operand" "0") @@ -15552,7 +15861,7 @@ (const_int 0)) (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4)) - (use (reg:DI 19))] + (use (reg:SI 19))] "TARGET_64BIT" "{rep\;stosb|rep stosb}" [(set_attr "type" "str") @@ -15566,17 +15875,25 @@ (match_operand:BLK 2 "general_operand" ""))) (use (match_operand 3 "general_operand" "")) (use (match_operand 4 "immediate_operand" ""))] - "" + "! optimize_size || TARGET_INLINE_ALL_STRINGOPS" { rtx addr1, addr2, out, outlow, count, countreg, align; + /* Can't use this if the user has appropriated esi or edi. */ + if (global_regs[4] || global_regs[5]) + FAIL; + out = operands[0]; if (GET_CODE (out) != REG) out = gen_reg_rtx (SImode); addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); - + if (addr1 != XEXP (operands[1], 0)) + operands[1] = replace_equiv_address_nv (operands[1], addr1); + if (addr2 != XEXP (operands[2], 0)) + operands[2] = replace_equiv_address_nv (operands[2], addr2); + count = operands[3]; countreg = ix86_zero_extend_to_Pmode (count); @@ -15593,27 +15910,17 @@ emit_move_insn (operands[0], const0_rtx); DONE; } - if (TARGET_64BIT) - emit_insn (gen_cmpstrqi_nz_rex_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); - else - emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); + emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); } else { if (TARGET_64BIT) - { - emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); - emit_insn (gen_cmpstrqi_rex_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); - } + emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); else - { - emit_insn (gen_cmpsi_1 (countreg, countreg)); - emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align, - addr1, addr2, countreg)); - } + emit_insn (gen_cmpsi_1 (countreg, countreg)); + emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); } outlow = gen_lowpart (QImode, out); @@ -15644,7 +15951,20 @@ ;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is ;; zero. Emit extra code to make sure that a zero-length compare is EQ. -(define_insn "cmpstrqi_nz_1" +(define_expand "cmpstrqi_nz_1" + [(parallel [(set (reg:CC 17) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" ""))) + (use (match_operand 2 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:SI 19)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "") + +(define_insn "*cmpstrqi_nz_1" [(set (reg:CC 17) (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) (mem:BLK (match_operand:SI 5 "register_operand" "1")))) @@ -15660,7 +15980,7 @@ (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "cmpstrqi_nz_rex_1" +(define_insn "*cmpstrqi_nz_rex_1" [(set (reg:CC 17) (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) (mem:BLK (match_operand:DI 5 "register_operand" "1")))) @@ -15678,7 +15998,23 @@ ;; The same, but the count is not known to not be zero. -(define_insn "cmpstrqi_1" +(define_expand "cmpstrqi_1" + [(parallel [(set (reg:CC 17) + (if_then_else:CC (ne (match_operand 2 "register_operand" "") + (const_int 0)) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" "")) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC 17)) + (use (reg:SI 19)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "") + +(define_insn "*cmpstrqi_1" [(set (reg:CC 17) (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") (const_int 0)) @@ -15697,7 +16033,7 @@ (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "cmpstrqi_rex_1" +(define_insn "*cmpstrqi_rex_1" [(set (reg:CC 17) (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") (const_int 0)) @@ -15742,7 +16078,15 @@ FAIL; }) -(define_insn "strlenqi_1" +(define_expand "strlenqi_1" + [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" "")) + (use (reg:SI 19)) + (clobber (match_operand 1 "register_operand" "")) + (clobber (reg:CC 17))])] + "" + "") + +(define_insn "*strlenqi_1" [(set (match_operand:SI 0 "register_operand" "=&c") (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") @@ -15757,7 +16101,7 @@ (set_attr "mode" "QI") (set_attr "prefix_rep" "1")]) -(define_insn "strlenqi_rex_1" +(define_insn "*strlenqi_rex_1" [(set (match_operand:DI 0 "register_operand" "=&c") (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) (match_operand:QI 2 "register_operand" "a") @@ -15869,7 +16213,7 @@ (define_insn "x86_movdicc_0_m1_rex64" [(set (match_operand:DI 0 "register_operand" "=r") - (if_then_else:DI (ltu (reg:CC 17) (const_int 0)) + (if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "") (const_int -1) (const_int 0))) (clobber (reg:CC 17))] @@ -15884,7 +16228,7 @@ (set_attr "mode" "DI") (set_attr "length_immediate" "0")]) -(define_insn "*movdicc_c_rex64" +(define_insn "movdicc_c_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" [(reg 17) (const_int 0)]) @@ -15912,7 +16256,7 @@ (define_insn "x86_movsicc_0_m1" [(set (match_operand:SI 0 "register_operand" "=r") - (if_then_else:SI (ltu (reg:CC 17) (const_int 0)) + (if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "") (const_int -1) (const_int 0))) (clobber (reg:CC 17))] @@ -15944,9 +16288,9 @@ (define_expand "movhicc" [(set (match_operand:HI 0 "register_operand" "") (if_then_else:HI (match_operand 1 "comparison_operator" "") - (match_operand:HI 2 "nonimmediate_operand" "") - (match_operand:HI 3 "nonimmediate_operand" "")))] - "TARGET_CMOVE && TARGET_HIMODE_MATH" + (match_operand:HI 2 "general_operand" "") + (match_operand:HI 3 "general_operand" "")))] + "TARGET_HIMODE_MATH" "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") (define_insn "*movhicc_noc" @@ -15963,6 +16307,33 @@ [(set_attr "type" "icmov") (set_attr "mode" "HI")]) +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "general_operand" "") + (match_operand:QI 3 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") + +(define_insn_and_split "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 2) + (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + (define_expand "movsfcc" [(set (match_operand:SF 0 "register_operand" "") (if_then_else:SF (match_operand 1 "comparison_operator" "") @@ -15972,11 +16343,11 @@ "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") (define_insn "*movsfcc_1" - [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f") (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" [(reg 17) (const_int 0)]) - (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") - (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + (match_operand:SF 2 "nonimmediate_operand" "f#r,0,rm#f,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f#r,0,rm#f")))] "TARGET_CMOVE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "@ @@ -15996,11 +16367,11 @@ "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") (define_insn "*movdfcc_1" - [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg 17) (const_int 0)]) - (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") - (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + (match_operand:DF 2 "nonimmediate_operand" "f#r,0,rm#f,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f#r,0,rm#f")))] "!TARGET_64BIT && TARGET_CMOVE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "@ @@ -16012,11 +16383,11 @@ (set_attr "mode" "DF")]) (define_insn "*movdfcc_1_rex64" - [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,r#f,r#f") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg 17) (const_int 0)]) - (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") - (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + (match_operand:DF 2 "nonimmediate_operand" "f#r,0#r,rm#f,0#f") + (match_operand:DF 3 "nonimmediate_operand" "0#r,f#r,0#f,rm#f")))] "TARGET_64BIT && TARGET_CMOVE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)" "@ @@ -16030,7 +16401,7 @@ (define_split [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" - [(match_operand 4 "" "") (const_int 0)]) + [(match_operand 4 "flags_reg_operand" "") (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand" "") (match_operand:DF 3 "nonimmediate_operand" "")))] "!TARGET_64BIT && reload_completed" @@ -16051,14 +16422,6 @@ (if_then_else:XF (match_operand 1 "comparison_operator" "") (match_operand:XF 2 "register_operand" "") (match_operand:XF 3 "register_operand" "")))] - "!TARGET_64BIT && TARGET_CMOVE" - "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") - -(define_expand "movtfcc" - [(set (match_operand:TF 0 "register_operand" "") - (if_then_else:TF (match_operand 1 "comparison_operator" "") - (match_operand:TF 2 "register_operand" "") - (match_operand:TF 3 "register_operand" "")))] "TARGET_CMOVE" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") @@ -16068,19 +16431,6 @@ [(reg 17) (const_int 0)]) (match_operand:XF 2 "register_operand" "f,0") (match_operand:XF 3 "register_operand" "0,f")))] - "!TARGET_64BIT && TARGET_CMOVE" - "@ - fcmov%F1\t{%2, %0|%0, %2} - fcmov%f1\t{%3, %0|%0, %3}" - [(set_attr "type" "fcmov") - (set_attr "mode" "XF")]) - -(define_insn "*movtfcc_1" - [(set (match_operand:TF 0 "register_operand" "=f,f") - (if_then_else:TF (match_operator 1 "fcmov_comparison_operator" - [(reg 17) (const_int 0)]) - (match_operand:TF 2 "register_operand" "f,0") - (match_operand:TF 3 "register_operand" "0,f")))] "TARGET_CMOVE" "@ fcmov%F1\t{%2, %0|%0, %2} @@ -16138,6 +16488,39 @@ (match_dup 1) (match_dup 2)))]) +;; Conditional addition patterns +(define_expand "addqicc" + [(match_operand:QI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "const_int_operand" "")] + "" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + +(define_expand "addhicc" + [(match_operand:HI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:HI 2 "register_operand" "") + (match_operand:HI 3 "const_int_operand" "")] + "" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + +(define_expand "addsicc" + [(match_operand:SI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + +(define_expand "adddicc" + [(match_operand:DI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "register_operand" "") + (match_operand:DI 3 "const_int_operand" "")] + "TARGET_64BIT" + "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;") + ;; We can't represent the LT test directly. Do this by swapping the operands. (define_split @@ -16236,7 +16619,7 @@ && operands_match_p (operands[2], operands[3])))" [(set (reg:CCFP 17) (compare:CCFP (match_dup 2) - (match_dup 2))) + (match_dup 1))) (set (match_dup 0) (if_then_else:DF (ge (reg:CCFP 17) (const_int 0)) (match_dup 1) @@ -16426,23 +16809,7 @@ ;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] ;; ;; in proper program order. -(define_expand "pro_epilogue_adjust_stack" - [(parallel [(set (match_operand:SI 0 "register_operand" "=r,r") - (plus:SI (match_operand:SI 1 "register_operand" "0,r") - (match_operand:SI 2 "immediate_operand" "i,i"))) - (clobber (reg:CC 17)) - (clobber (mem:BLK (scratch)))])] - "" -{ - if (TARGET_64BIT) - { - emit_insn (gen_pro_epilogue_adjust_stack_rex64 - (operands[0], operands[1], operands[2])); - DONE; - } -}) - -(define_insn "*pro_epilogue_adjust_stack_1" +(define_insn "pro_epilogue_adjust_stack_1" [(set (match_operand:SI 0 "register_operand" "=r,r") (plus:SI (match_operand:SI 1 "register_operand" "0,r") (match_operand:SI 2 "immediate_operand" "i,i"))) @@ -16498,6 +16865,8 @@ case TYPE_ALU: if (GET_CODE (operands[2]) == CONST_INT + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) && (INTVAL (operands[2]) == 128 || (INTVAL (operands[2]) < 0 && INTVAL (operands[2]) != -128))) @@ -16524,6 +16893,30 @@ (const_string "lea"))) (set_attr "mode" "DI")]) +(define_insn "pro_epilogue_adjust_stack_rex64_2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 3 "immediate_operand" "i,i"))) + (use (match_operand:DI 2 "register_operand" "r,r")) + (clobber (reg:CC 17)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + return "add{q}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + default: + abort (); + } +} + [(set_attr "type" "alu,lea") + (set_attr "mode" "DI")]) ;; Placeholder for the conditional moves. This one is split either to SSE ;; based moves emulation or to usual cmove sequence. Little bit unfortunate @@ -16544,6 +16937,12 @@ (clobber (reg:CC 17))] "TARGET_SSE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16571,6 +16970,12 @@ (clobber (reg:CC 17))] "TARGET_SSE2 && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16609,7 +17014,7 @@ DONE; }) -;; Split SSE based conditional move into seqence: +;; Split SSE based conditional move into sequence: ;; cmpCC op0, op4 - set op0 to 0 or ffffffff depending on the comparison ;; and op2, op0 - zero op2 if comparison was false ;; nand op0, op3 - load op3 to op0 if comparison was false @@ -16632,10 +17037,22 @@ (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) (subreg:TI (match_dup 7) 0)))] { - /* If op2 == op3, op3 will be clobbered before it is used. - This should be optimized out though. */ + if (GET_MODE (operands[2]) == DFmode + && TARGET_SSE_PARTIAL_REGS && !optimize_size) + { + rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + + /* If op2 == op3, op3 would be clobbered before it is used. */ if (operands_match_p (operands[2], operands[3])) - abort (); + { + emit_move_insn (operands[0], operands[2]); + DONE; + } + PUT_MODE (operands[1], GET_MODE (operands[0])); if (operands_match_p (operands[0], operands[4])) operands[6] = operands[4], operands[7] = operands[2]; @@ -16643,7 +17060,7 @@ operands[6] = operands[2], operands[7] = operands[4]; }) -;; Special case of conditional move we can handle effectivly. +;; Special case of conditional move we can handle effectively. ;; Do not brother with the integer/floating point case, since these are ;; bot considerably slower, unlike in the generic case. (define_insn "*sse_movsfcc_const0_1" @@ -16738,8 +17155,22 @@ || const0_operand (operands[3], GET_MODE (operands[0])))" [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)])) (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6) - (subreg:TI (match_dup 7) 0)))] + (match_dup 7)))] { + if (TARGET_SSE_PARTIAL_REGS && !optimize_size + && GET_MODE (operands[2]) == DFmode) + { + if (REG_P (operands[2])) + { + rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + if (REG_P (operands[3])) + { + rtx op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + } PUT_MODE (operands[1], GET_MODE (operands[0])); if (!sse_comparison_operator (operands[1], VOIDmode) || !rtx_equal_p (operands[0], operands[4])) @@ -16762,39 +17193,71 @@ operands[7] = operands[2]; operands[6] = gen_rtx_SUBREG (TImode, operands[0], 0); } + operands[7] = simplify_gen_subreg (TImode, operands[7], + GET_MODE (operands[7]), 0); }) (define_expand "allocate_stack_worker" [(match_operand:SI 0 "register_operand" "")] "TARGET_STACK_PROBE" { - if (TARGET_64BIT) - emit_insn (gen_allocate_stack_worker_rex64 (operands[0])); + if (reload_completed) + { + if (TARGET_64BIT) + emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0])); + else + emit_insn (gen_allocate_stack_worker_postreload (operands[0])); + } else - emit_insn (gen_allocate_stack_worker_1 (operands[0])); + { + if (TARGET_64BIT) + emit_insn (gen_allocate_stack_worker_rex64 (operands[0])); + else + emit_insn (gen_allocate_stack_worker_1 (operands[0])); + } DONE; }) (define_insn "allocate_stack_worker_1" - [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] UNSPEC_STACK_PROBE) + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0))) - (clobber (match_dup 0)) + (clobber (match_scratch:SI 1 "=0")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_STACK_PROBE" "call\t__alloca" [(set_attr "type" "multi") (set_attr "length" "5")]) +(define_expand "allocate_stack_worker_postreload" + [(parallel [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) + (set (reg:SI 7) (minus:SI (reg:SI 7) (match_dup 0))) + (clobber (match_dup 0)) + (clobber (reg:CC 17))])] + "" + "") + (define_insn "allocate_stack_worker_rex64" - [(unspec:DI [(match_operand:DI 0 "register_operand" "a")] UNSPEC_STACK_PROBE) + [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) (set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0))) - (clobber (match_dup 0)) + (clobber (match_scratch:DI 1 "=0")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_STACK_PROBE" "call\t__alloca" [(set_attr "type" "multi") (set_attr "length" "5")]) +(define_expand "allocate_stack_worker_rex64_postreload" + [(parallel [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")] + UNSPECV_STACK_PROBE) + (set (reg:DI 7) (minus:DI (reg:DI 7) (match_dup 0))) + (clobber (match_dup 0)) + (clobber (reg:CC 17))])] + "" + "") + (define_expand "allocate_stack" [(parallel [(set (match_operand:SI 0 "register_operand" "=r") (minus:SI (reg:SI 7) @@ -17025,7 +17488,7 @@ [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC 17))]) (set (match_dup 0) (match_dup 1))] - "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") + "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 [(match_scratch:QI 1 "q") @@ -17039,7 +17502,7 @@ [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC 17))]) (set (match_dup 0) (match_dup 1))] - "operands[2] = gen_rtx_REG (SImode, true_regnum (operands[1]));") + "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 [(match_scratch:SI 2 "r") @@ -17085,7 +17548,7 @@ ;; NOT is not pairable on Pentium, while XOR is, but one byte longer. ;; Don't split NOTs with a displacement operand, because resulting XOR -;; will not be pariable anyway. +;; will not be pairable anyway. ;; ;; On AMD K6, NOT is vector decoded with memory operand that can not be ;; represented using a modRM byte. The XOR replacement is long decoded, @@ -17294,8 +17757,8 @@ && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC 17))])] - "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode, - true_regnum (operands[0]));") + "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, + operands[0]);") (define_peephole2 [(set (strict_low_part (match_operand 0 "register_operand" "")) @@ -17318,8 +17781,8 @@ && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC 17))])] - "operands[0] = gen_rtx_REG (GET_MODE (operands[0]) == DImode ? DImode : SImode, - true_regnum (operands[0]));") + "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, + operands[0]);") ;; Attempt to convert simple leas to adds. These can be created by ;; move expanders. @@ -17693,6 +18156,102 @@ (set (reg:DI 7) (plus:DI (reg:DI 7) (const_int 8)))])] "") +;; Imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, reg, reg is direct decoded. +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "memory_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_K8 && !optimize_size + && (GET_CODE (operands[2]) != CONST_INT + || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) + (clobber (reg:CC 17))])] +"") + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" ""))) + (clobber (reg:CC 17))])] + "TARGET_K8 && !optimize_size + && (GET_CODE (operands[2]) != CONST_INT + || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) + (clobber (reg:CC 17))])] +"") + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" "")))) + (clobber (reg:CC 17))])] + "TARGET_K8 && !optimize_size + && (GET_CODE (operands[2]) != CONST_INT + || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC 17))])] +"") + +;; imul $8/16bit_imm, regmem, reg is vector decoded. +;; Convert it into imul reg, reg +;; It would be better to force assembler to encode instruction using long +;; immediate, but there is apparently no way to do so. +(define_peephole2 + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC 17))]) + (match_scratch:DI 3 "r")] + "TARGET_K8 && !optimize_size + && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC 17))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC 17))]) + (match_scratch:SI 3 "r")] + "TARGET_K8 && !optimize_size + && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC 17))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +(define_peephole2 + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "immediate_operand" ""))) + (clobber (reg:CC 17))]) + (match_scratch:HI 3 "r")] + "TARGET_K8 && !optimize_size" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) + (clobber (reg:CC 17))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + ;; Call-value patterns last so that the wildcard operand does not ;; disrupt insn-recog's switch tables. @@ -17763,19 +18322,23 @@ [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm")) (match_operand:SI 2 "" "")))] - "!TARGET_64BIT" + "!SIBLING_CALL_P (insn) && !TARGET_64BIT" { if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%*%1"; - else - return "call\t%*%1"; + return "call\t%P1"; + return "call\t%*%1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a")) + (match_operand:SI 2 "" "")))] + "SIBLING_CALL_P (insn) && !TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], QImode)) + return "jmp\t%P1"; + return "jmp\t%*%1"; } [(set_attr "type" "callv")]) @@ -17783,21 +18346,29 @@ [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) (match_operand:DI 2 "" "")))] - "TARGET_64BIT" + "!SIBLING_CALL_P (insn) && TARGET_64BIT" { if (constant_call_address_operand (operands[1], QImode)) - { - if (SIBLING_CALL_P (insn)) - return "jmp\t%P1"; - else - return "call\t%P1"; - } - if (SIBLING_CALL_P (insn)) - return "jmp\t%A1"; - else - return "call\t%A1"; + return "call\t%P1"; + return "call\t%A1"; } [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "" "")))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t%P1" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64_v" + [(set (match_operand 0 "" "") + (call (mem:QI (reg:DI 40)) + (match_operand:DI 1 "" "")))] + "SIBLING_CALL_P (insn) && TARGET_64BIT" + "jmp\t*%%r11" + [(set_attr "type" "callv")]) (define_insn "trap" [(trap_if (const_int 1) (const_int 5))] @@ -17842,7 +18413,7 @@ { operands[2] = gen_label_rtx (); output_asm_insn ("j%c0\t%l2\; int\t%1", operands); - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (operands[2])); RET; }) @@ -17855,37 +18426,120 @@ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) +(define_split + [(set (match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); + operands[2] = CONST0_RTX (V4SFmode); +}) + (define_insn "movv4si_internal" [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m") (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv2di_internal" [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m") (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "@ - pxor\t%0, %0 - movdqa\t{%1, %0|%0, %1} - movdqa\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) + +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE2" + [(set (match_dup 0) + (vec_merge:V2DF + (vec_duplicate:V2DF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); + operands[2] = CONST0_RTX (V2DFmode); +}) (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") @@ -17947,41 +18601,140 @@ DONE; }) +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TFmode, operands); + else + ix86_expand_vector_move (TFmode, operands); + DONE; +}) + (define_insn "movv2df_internal" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE2 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - xorpd\t%0, %0 - movapd\t{%1, %0|%0, %1} - movapd\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "xorpd\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movapd\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "V2DF")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "V2DF"))] + (const_string "V2DF")))]) (define_insn "movv8hi_internal" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m") (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE2 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv16qi_internal" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))] + (match_operand:V16QI 1 "nonimmediate_operand" "C,xm,x"))] "TARGET_SSE2 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_expand "movv2df" [(set (match_operand:V2DF 0 "nonimmediate_operand" "") @@ -18073,6 +18826,12 @@ DONE; }) +(define_insn "*pushti" + [(set (match_operand:TI 0 "push_operand" "=<") + (match_operand:TI 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + (define_insn "*pushv2df" [(set (match_operand:V2DF 0 "push_operand" "=<") (match_operand:V2DF 1 "register_operand" "x"))] @@ -18156,152 +18915,132 @@ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") -(define_insn_and_split "*pushti" - [(set (match_operand:TI 0 "push_operand" "=<") - (match_operand:TI 1 "nonmemory_operand" "x"))] - "TARGET_SSE" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:TI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv2df" - [(set (match_operand:V2DF 0 "push_operand" "=<") - (match_operand:V2DF 1 "nonmemory_operand" "x"))] - "TARGET_SSE2" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V2DF (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv2di" - [(set (match_operand:V2DI 0 "push_operand" "=<") - (match_operand:V2DI 1 "nonmemory_operand" "x"))] - "TARGET_SSE2" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V2DI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv8hi" - [(set (match_operand:V8HI 0 "push_operand" "=<") - (match_operand:V8HI 1 "nonmemory_operand" "x"))] - "TARGET_SSE2" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V8HI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv16qi" - [(set (match_operand:V16QI 0 "push_operand" "=<") - (match_operand:V16QI 1 "nonmemory_operand" "x"))] - "TARGET_SSE2" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V16QI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv4sf" - [(set (match_operand:V4SF 0 "push_operand" "=<") - (match_operand:V4SF 1 "nonmemory_operand" "x"))] - "TARGET_SSE" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V4SF (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv4si" - [(set (match_operand:V4SI 0 "push_operand" "=<") - (match_operand:V4SI 1 "nonmemory_operand" "x"))] - "TARGET_SSE" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) - (set (mem:V4SI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "multi")]) - -(define_insn_and_split "*pushv2si" - [(set (match_operand:V2SI 0 "push_operand" "=<") - (match_operand:V2SI 1 "nonmemory_operand" "y"))] - "TARGET_MMX" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V2SI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - -(define_insn_and_split "*pushv4hi" - [(set (match_operand:V4HI 0 "push_operand" "=<") - (match_operand:V4HI 1 "nonmemory_operand" "y"))] - "TARGET_MMX" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V4HI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - -(define_insn_and_split "*pushv8qi" - [(set (match_operand:V8QI 0 "push_operand" "=<") - (match_operand:V8QI 1 "nonmemory_operand" "y"))] - "TARGET_MMX" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V8QI (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - -(define_insn_and_split "*pushv2sf" - [(set (match_operand:V2SF 0 "push_operand" "=<") - (match_operand:V2SF 1 "nonmemory_operand" "y"))] - "TARGET_3DNOW" - "#" - "" - [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) - (set (mem:V2SF (reg:SI 7)) (match_dup 1))] - "" - [(set_attr "type" "mmx")]) - (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE && !TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - # - # - xorps\t%0, %0 - movaps\\t{%1, %0|%0, %1} - movaps\\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_insn "*movtf_rex64" + [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} + [(set_attr "type" "*,*,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -18311,6 +19050,14 @@ [(const_int 0)] "ix86_split_long_move (operands); DONE;") +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups (define_expand "sse_movaps" @@ -18690,14 +19437,14 @@ ;; on integral types. We deal with this by representing the floating point ;; logical as logical on arguments casted to TImode as this is what hardware ;; really does. Unfortunately hardware requires the type information to be -;; present and thus we must avoid subregs from being simplified and elliminated +;; present and thus we must avoid subregs from being simplified and eliminated ;; in later compilation phases. ;; ;; We have following variants from each instruction: ;; sse_andsf3 - the operation taking V4SF vector operands ;; and doing TImode cast on them ;; *sse_andsf3_memory - the operation taking one memory operand casted to -;; TImode, since backend insist on elliminating casts +;; TImode, since backend insist on eliminating casts ;; on memory operands ;; sse_andti3_sf_1 - the operation taking SF scalar operands. ;; We can not accept memory operand here as instruction reads @@ -18705,7 +19452,7 @@ ;; scalar float operations that expands to logicals (fabs) ;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode ;; memory operand. Eventually combine can be able -;; to synthetize these using splitter. +;; to synthesize these using splitter. ;; sse2_anddf3, *sse2_anddf3_memory ;; ;; @@ -18963,7 +19710,7 @@ (define_insn "sse2_nandv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0")) + (and:V2DI (not:V2DI (match_operand:V2DI 1 "register_operand" "0")) (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" @@ -19015,12 +19762,26 @@ ;; this insn. (define_insn "sse_clrv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] UNSPEC_NOP))] + (match_operand:V4SF 1 "const0_operand" "X"))] "TARGET_SSE" - "xorps\t{%0, %0|%0, %0}" +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t{%0, %0|%0, %0}"; + else + return "xorps\t{%0, %0|%0, %0}"; +} [(set_attr "type" "sselog") (set_attr "memory" "none") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")))]) ;; Use xor, but don't show input operands so they aren't live before ;; this insn. @@ -19103,7 +19864,7 @@ (parallel [(const_int 0)]))))] "TARGET_SSE" "comiss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecmp") + [(set_attr "type" "ssecomi") (set_attr "mode" "SF")]) (define_insn "sse_ucomi" @@ -19116,7 +19877,7 @@ (parallel [(const_int 0)]))))] "TARGET_SSE" "ucomiss\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecmp") + [(set_attr "type" "ssecomi") (set_attr "mode" "SF")]) @@ -19241,15 +20002,16 @@ (set_attr "mode" "SF")]) (define_insn "cvtsi2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x") + [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF - (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 1 "register_operand" "0,0") (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,rm"))) (const_int 14)))] "TARGET_SSE" "cvtsi2ss\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") (set_attr "mode" "SF")]) (define_insn "cvtsi2ssq" @@ -19261,19 +20023,20 @@ (const_int 14)))] "TARGET_SSE && TARGET_64BIT" "cvtsi2ssq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "athlon_decode" "vector,vector") + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "vector,double") (set_attr "mode" "SF")]) (define_insn "cvtss2si" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r,r") (vec_select:SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) (parallel [(const_int 0)])))] "TARGET_SSE" "cvtss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "SF")]) + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "SI")]) (define_insn "cvtss2siq" [(set (match_operand:DI 0 "register_operand" "=r,r") @@ -19282,20 +20045,21 @@ (parallel [(const_int 0)])))] "TARGET_SSE" "cvtss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "athlon_decode" "vector,vector") - (set_attr "mode" "SF")]) + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) (define_insn "cvttss2si" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r,r") (vec_select:SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] UNSPEC_FIX) (parallel [(const_int 0)])))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "SF")]) + [(set_attr "type" "sseicvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "double,vector")]) (define_insn "cvttss2siq" [(set (match_operand:DI 0 "register_operand" "=r,r") @@ -19305,9 +20069,9 @@ (parallel [(const_int 0)])))] "TARGET_SSE && TARGET_64BIT" "cvttss2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "SF") - (set_attr "athlon_decode" "vector,vector")]) + (set_attr "athlon_decode" "double,vector")]) ;; MMX insns @@ -19639,7 +20403,7 @@ (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0") (vec_duplicate:V4HI (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (match_operand:SI 3 "immediate_operand" "i")))] + (match_operand:SI 3 "const_0_to_15_operand" "N")))] "TARGET_SSE || TARGET_3DNOW_A" "pinsrw\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "mmxcvt") @@ -19649,7 +20413,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") (parallel - [(match_operand:SI 2 "immediate_operand" "i")]))))] + [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))] "TARGET_SSE || TARGET_3DNOW_A" "pextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "mmxcvt") @@ -20100,7 +20864,7 @@ output_asm_insn (\"rex\", operands); output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands); } - ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\", + (*targetm.asm_out.internal_label) (asm_out_file, \"L\", CODE_LABEL_NUMBER (operands[3])); RET; } @@ -20330,7 +21094,7 @@ [(set_attr "type" "mmxshft") (set_attr "mode" "TI")]) -;; 3DNow reciprical and sqrt +;; 3DNow reciprocal and sqrt (define_insn "pfrcpv2sf2" [(set (match_operand:V2SF 0 "register_operand" "=y") @@ -20724,7 +21488,7 @@ (parallel [(const_int 0)]))))] "TARGET_SSE2" "comisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecmp") + [(set_attr "type" "ssecomi") (set_attr "mode" "DF")]) (define_insn "sse2_ucomi" @@ -20737,7 +21501,7 @@ (parallel [(const_int 0)]))))] "TARGET_SSE2" "ucomisd\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecmp") + [(set_attr "type" "ssecomi") (set_attr "mode" "DF")]) ;; SSE Strange Moves. @@ -20901,31 +21665,34 @@ ;; Conversions between SI and DF (define_insn "cvtsd2si" - [(set (match_operand:SI 0 "register_operand" "=r") - (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m") (parallel [(const_int 0)]))))] "TARGET_SSE2" "cvtsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") (set_attr "mode" "SI")]) (define_insn "cvtsd2siq" - [(set (match_operand:DI 0 "register_operand" "=r") - (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m") (parallel [(const_int 0)]))))] "TARGET_SSE2 && TARGET_64BIT" "cvtsd2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "SI")]) + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "mode" "DI")]) (define_insn "cvttsd2si" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") (parallel [(const_int 0)]))] UNSPEC_FIX))] "TARGET_SSE2" "cvttsd2si\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "SI")]) + [(set_attr "type" "sseicvt") + (set_attr "mode" "SI") + (set_attr "athlon_decode" "double,vector")]) (define_insn "cvttsd2siq" [(set (match_operand:DI 0 "register_operand" "=r,r") @@ -20933,21 +21700,22 @@ (parallel [(const_int 0)]))] UNSPEC_FIX))] "TARGET_SSE2 && TARGET_64BIT" "cvttsd2siq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "DI") - (set_attr "athlon_decode" "vector,vector")]) + (set_attr "athlon_decode" "double,vector")]) (define_insn "cvtsi2sd" - [(set (match_operand:V2DF 0 "register_operand" "=x") - (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") (vec_duplicate:V2DF (float:DF - (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 2 "nonimmediate_operand" "r,rm"))) (const_int 2)))] "TARGET_SSE2" "cvtsi2sd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) + [(set_attr "type" "sseicvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "double,direct")]) (define_insn "cvtsi2sdq" [(set (match_operand:V2DF 0 "register_operand" "=x,x") @@ -20958,22 +21726,23 @@ (const_int 2)))] "TARGET_SSE2 && TARGET_64BIT" "cvtsi2sdq\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "sseicvt") (set_attr "mode" "DF") - (set_attr "athlon_decode" "vector,direct")]) + (set_attr "athlon_decode" "double,direct")]) ;; Conversions between SF and DF (define_insn "cvtsd2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0,0") (vec_duplicate:V4SF (float_truncate:V2SF - (match_operand:V2DF 2 "register_operand" "xm"))) + (match_operand:V2DF 2 "nonimmediate_operand" "x,xm"))) (const_int 14)))] "TARGET_SSE2" "cvtsd2ss\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,double") (set_attr "mode" "SF")]) (define_insn "cvtss2sd" @@ -20981,7 +21750,7 @@ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") (float_extend:V2DF (vec_select:V2SF - (match_operand:V4SF 2 "register_operand" "xm") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))) (const_int 2)))] @@ -21257,10 +22026,20 @@ (define_insn "sse2_clrti" [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] "TARGET_SSE2" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "sseiadd") +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + else + return "xorps\t%0, %0"; +} + [(set_attr "type" "ssemov") (set_attr "memory" "none") - (set_attr "mode" "TI")]) + (set (attr "mode") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")))]) ;; MMX unsigned averages/sum of absolute differences @@ -21320,7 +22099,7 @@ (vec_duplicate:V8HI (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) - (match_operand:SI 3 "immediate_operand" "i")))] + (match_operand:SI 3 "const_0_to_255_operand" "N")))] "TARGET_SSE2" "pinsrw\t{%3, %2, %0|%0, %2, %3}" [(set_attr "type" "ssecvt") @@ -21331,7 +22110,7 @@ (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "register_operand" "x") (parallel - [(match_operand:SI 2 "immediate_operand" "i")]))))] + [(match_operand:SI 2 "const_0_to_7_operand" "N")]))))] "TARGET_SSE2" "pextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "ssecvt") @@ -21468,7 +22247,7 @@ (define_insn "ashrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psraw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21477,7 +22256,7 @@ (define_insn "ashrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrad\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21486,7 +22265,7 @@ (define_insn "lshrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21495,7 +22274,7 @@ (define_insn "lshrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21504,7 +22283,7 @@ (define_insn "lshrv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21513,7 +22292,7 @@ (define_insn "ashlv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21522,7 +22301,7 @@ (define_insn "ashlv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "pslld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21531,7 +22310,7 @@ (define_insn "ashlv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:TI 2 "nonmemory_operand" "xi")))] + (match_operand:SI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21540,7 +22319,7 @@ (define_insn "ashrv8hi3_ti" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psraw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21549,7 +22328,7 @@ (define_insn "ashrv4si3_ti" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psrad\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21558,7 +22337,7 @@ (define_insn "lshrv8hi3_ti" [(set (match_operand:V8HI 0 "register_operand" "=x") (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psrlw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21567,7 +22346,7 @@ (define_insn "lshrv4si3_ti" [(set (match_operand:V4SI 0 "register_operand" "=x") (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psrld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21576,7 +22355,7 @@ (define_insn "lshrv2di3_ti" [(set (match_operand:V2DI 0 "register_operand" "=x") (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psrlq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21585,7 +22364,7 @@ (define_insn "ashlv8hi3_ti" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psllw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21594,7 +22373,7 @@ (define_insn "ashlv4si3_ti" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "pslld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21603,7 +22382,7 @@ (define_insn "ashlv2di3_ti" [(set (match_operand:V2DI 0 "register_operand" "=x") (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] + (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))] "TARGET_SSE2" "psllq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21640,26 +22419,26 @@ (define_insn "sse2_unpckhpd" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_concat:V2DF - (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 1)])) - (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") - (parallel [(const_int 0)]))))] + (vec_select:DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 1)])) + (vec_select:DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 1)]))))] "TARGET_SSE2" "unpckhpd\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) + (set_attr "mode" "V2DF")]) (define_insn "sse2_unpcklpd" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_concat:V2DF - (vec_select:V2DF (match_operand:V2DF 1 "register_operand" "0") - (parallel [(const_int 0)])) - (vec_select:V2DF (match_operand:V2DF 2 "register_operand" "x") - (parallel [(const_int 1)]))))] + (vec_select:DF (match_operand:V2DF 1 "register_operand" "0") + (parallel [(const_int 0)])) + (vec_select:DF (match_operand:V2DF 2 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE2" "unpcklpd\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt") - (set_attr "mode" "TI")]) + (set_attr "mode" "V2DF")]) ;; MMX pack/unpack insns. @@ -21975,17 +22754,6 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_movlpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (vec_merge:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "0,0") - (match_operand:V2DF 2 "nonimmediate_operand" "m,x") - (const_int 1)))] - "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" - "movlpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "V2DF")]) - (define_expand "sse2_loadsd" [(match_operand:V2DF 0 "register_operand" "") (match_operand:DF 1 "memory_operand" "")] @@ -22008,15 +22776,17 @@ (set_attr "mode" "DF")]) (define_insn "sse2_movsd" - [(set (match_operand:V2DF 0 "register_operand" "=x") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") (vec_merge:V2DF - (match_operand:V2DF 1 "register_operand" "0") - (match_operand:V2DF 2 "register_operand" "x") + (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m,x") (const_int 1)))] - "TARGET_SSE2" - "movsd\t{%2, %0|%0, %2}" + "TARGET_SSE2 && ix86_binary_operator_ok (UNKNOWN, V2DFmode, operands)" + "@movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt") - (set_attr "mode" "DF")]) + (set_attr "mode" "DF,V2DF,V2DF")]) (define_insn "sse2_storesd" [(set (match_operand:DF 0 "memory_operand" "=m") @@ -22082,13 +22852,13 @@ [(set_attr "type" "sse") (set_attr "memory" "unknown")]) -;; PNI +;; SSE3 (define_insn "mwait" [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") (match_operand:SI 1 "register_operand" "c")] UNSPECV_MWAIT)] - "TARGET_PNI" + "TARGET_SSE3" "mwait\t%0, %1" [(set_attr "length" "3")]) @@ -22097,18 +22867,18 @@ (match_operand:SI 1 "register_operand" "c") (match_operand:SI 2 "register_operand" "d")] UNSPECV_MONITOR)] - "TARGET_PNI" + "TARGET_SSE3" "monitor\t%0, %1, %2" [(set_attr "length" "3")]) -;; PNI arithmetic +;; SSE3 arithmetic (define_insn "addsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm")] UNSPEC_ADDSUB))] - "TARGET_PNI" + "TARGET_SSE3" "addsubps\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "V4SF")]) @@ -22118,7 +22888,7 @@ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "xm")] UNSPEC_ADDSUB))] - "TARGET_PNI" + "TARGET_SSE3" "addsubpd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "V2DF")]) @@ -22128,7 +22898,7 @@ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm")] UNSPEC_HADD))] - "TARGET_PNI" + "TARGET_SSE3" "haddps\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "V4SF")]) @@ -22138,7 +22908,7 @@ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "xm")] UNSPEC_HADD))] - "TARGET_PNI" + "TARGET_SSE3" "haddpd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "V2DF")]) @@ -22148,7 +22918,7 @@ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "nonimmediate_operand" "xm")] UNSPEC_HSUB))] - "TARGET_PNI" + "TARGET_SSE3" "hsubps\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "V4SF")]) @@ -22158,7 +22928,7 @@ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "xm")] UNSPEC_HSUB))] - "TARGET_PNI" + "TARGET_SSE3" "hsubpd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") (set_attr "mode" "V2DF")]) @@ -22167,7 +22937,7 @@ [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))] - "TARGET_PNI" + "TARGET_SSE3" "movshdup\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "mode" "V4SF")]) @@ -22176,7 +22946,7 @@ [(set (match_operand:V4SF 0 "register_operand" "=x") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))] - "TARGET_PNI" + "TARGET_SSE3" "movsldup\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "mode" "V4SF")]) @@ -22185,7 +22955,7 @@ [(set (match_operand:V16QI 0 "register_operand" "=x") (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] UNSPEC_LDQQU))] - "TARGET_PNI" + "TARGET_SSE3" "lddqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) @@ -22193,7 +22963,7 @@ (define_insn "loadddup" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))] - "TARGET_PNI" + "TARGET_SSE3" "movddup\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) @@ -22203,7 +22973,7 @@ (vec_duplicate:V2DF (vec_select:DF (match_operand:V2DF 1 "register_operand" "x") (parallel [(const_int 0)]))))] - "TARGET_PNI" + "TARGET_SSE3" "movddup\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) |