diff options
author | pfg <pfg@FreeBSD.org> | 2013-06-01 01:02:24 +0000 |
---|---|---|
committer | pfg <pfg@FreeBSD.org> | 2013-06-01 01:02:24 +0000 |
commit | cd8fbd7550760be2e4709d4855f6933f39bdc10f (patch) | |
tree | 3549d0ddfd14aee8437b42854867259626cb26fe /contrib/gcc/config/i386/i386.md | |
parent | 772c877e7b062cfc4d7136ef851c2eccae56df97 (diff) | |
download | FreeBSD-src-cd8fbd7550760be2e4709d4855f6933f39bdc10f.zip FreeBSD-src-cd8fbd7550760be2e4709d4855f6933f39bdc10f.tar.gz |
GCC: bring back experimental support for amdfam10/barcelona CPUs.
Initial support for the AMD amdfam10 chipsets has been available in the
gcc43 branch under GPLv2. AMD and some linux distributions (OpenSUSE) did
a backport of the amdfam10 support and made it available.
This is a revised subset of the support initially brought in in r236962
and later reverted. The collateral efects seem to have disappeared but
it is still recommended to set the CPUTYPE with caution.
Reviewed by: jkim (ages ago)
MFC after: 3 weeks
Diffstat (limited to 'contrib/gcc/config/i386/i386.md')
-rw-r--r-- | contrib/gcc/config/i386/i386.md | 352 |
1 files changed, 306 insertions, 46 deletions
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md index bd81c4a..21b9cb3 100644 --- a/contrib/gcc/config/i386/i386.md +++ b/contrib/gcc/config/i386/i386.md @@ -153,6 +153,12 @@ (UNSPEC_PSHUFB 120) (UNSPEC_PSIGN 121) (UNSPEC_PALIGNR 122) + + ; For SSE4A support + (UNSPEC_EXTRQI 130) + (UNSPEC_EXTRQ 131) + (UNSPEC_INSERTQI 132) + (UNSPEC_INSERTQ 133) ]) (define_constants @@ -192,7 +198,8 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8, + nocona,core2,generic32,generic64,amdfam10" (const (symbol_ref "ix86_tune"))) ;; A basic instruction type. Refinements due to arguments to be @@ -203,10 +210,10 @@ incdec,ishift,ishift1,rotate,rotate1,imul,idiv, icmp,test,ibr,setcc,icmov, push,pop,call,callv,leave, - str,cld, + str,bitmanip,cld, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, sselog,sselog1,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" (const_string "other")) @@ -220,7 +227,7 @@ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") (const_string "i387") (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, - sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv") + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") (const_string "mmx") @@ -230,7 +237,8 @@ ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" - (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave") + (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave, + bitmanip") (const_int 0) (eq_attr "unit" "i387,sse,mmx") (const_int 0) @@ -284,7 +292,7 @@ ;; Set when 0f opcode prefix is used. (define_attr "prefix_0f" "" (if_then_else - (ior (eq_attr "type" "imovx,setcc,icmov") + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip") (eq_attr "unit" "sse,mmx")) (const_int 1) (const_int 0))) @@ -413,7 +421,7 @@ (const_string "load") (and (eq_attr "type" "!alu1,negnot,ishift1, - imov,imovx,icmp,test, + imov,imovx,icmp,test,bitmanip, fmov,fcmp,fsgn, sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, mmx,mmxmov,mmxcmp,mmxcvt") @@ -968,10 +976,11 @@ "sahf" [(set_attr "length" "1") (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "SI")]) ;; Pentium Pro can do steps 1 through 3 in one go. - +;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) (define_insn "*cmpfp_i_mixed" [(set (reg:CCFP FLAGS_REG) (compare:CCFP (match_operand 0 "register_operand" "f,x") @@ -985,7 +994,8 @@ (if_then_else (match_operand:SF 1 "" "") (const_string "SF") (const_string "DF"))) - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*cmpfp_i_sse" [(set (reg:CCFP FLAGS_REG) @@ -1000,7 +1010,8 @@ (if_then_else (match_operand:SF 1 "" "") (const_string "SF") (const_string "DF"))) - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*cmpfp_i_i387" [(set (reg:CCFP FLAGS_REG) @@ -1019,7 +1030,8 @@ (const_string "DF") ] (const_string "XF"))) - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*cmpfp_iu_mixed" [(set (reg:CCFPU FLAGS_REG) @@ -1034,7 +1046,8 @@ (if_then_else (match_operand:SF 1 "" "") (const_string "SF") (const_string "DF"))) - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*cmpfp_iu_sse" [(set (reg:CCFPU FLAGS_REG) @@ -1049,7 +1062,8 @@ (if_then_else (match_operand:SF 1 "" "") (const_string "SF") (const_string "DF"))) - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*cmpfp_iu_387" [(set (reg:CCFPU FLAGS_REG) @@ -1068,7 +1082,8 @@ (const_string "DF") ] (const_string "XF"))) - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) ;; Move instructions. @@ -1274,7 +1289,8 @@ [(set_attr "type" "imov") (set_attr "mode" "SI") (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) (define_expand "movhi" [(set (match_operand:HI 0 "nonimmediate_operand" "") @@ -1391,8 +1407,10 @@ [(set_attr "type" "imov") (set_attr "mode" "SI") (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swaphi_2" [(set (match_operand:HI 0 "register_operand" "+r") (match_operand:HI 1 "register_operand" "+r")) @@ -1565,8 +1583,10 @@ [(set_attr "type" "imov") (set_attr "mode" "SI") (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 (define_insn "*swapqi_2" [(set (match_operand:QI 0 "register_operand" "+q") (match_operand:QI 1 "register_operand" "+q")) @@ -2120,7 +2140,8 @@ [(set_attr "type" "imov") (set_attr "mode" "DI") (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) (define_expand "movti" [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -4150,7 +4171,8 @@ "cvttss2si{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector")]) + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) (define_insn "fix_truncdfdi_sse" [(set (match_operand:DI 0 "register_operand" "=r,r") @@ -4159,7 +4181,8 @@ "cvttsd2si{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) (define_insn "fix_truncsfsi_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") @@ -4168,7 +4191,8 @@ "cvttss2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) (define_insn "fix_truncdfsi_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") @@ -4177,7 +4201,8 @@ "cvttsd2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector")]) + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) ;; Avoid vector decoded forms of the instruction. (define_peephole2 @@ -4438,7 +4463,8 @@ [(set_attr "length" "2") (set_attr "mode" "HI") (set_attr "unit" "i387") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) ;; Conversion between fixed point and floating point. @@ -4489,6 +4515,7 @@ (set_attr "mode" "SF") (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,vector,double") + (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsisf2_sse" @@ -4499,6 +4526,7 @@ [(set_attr "type" "sseicvt") (set_attr "mode" "SF") (set_attr "athlon_decode" "vector,double") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsisf2_i387" @@ -4532,6 +4560,7 @@ (set_attr "mode" "SF") (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,vector,double") + (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatdisf2_sse" @@ -4542,6 +4571,7 @@ [(set_attr "type" "sseicvt") (set_attr "mode" "SF") (set_attr "athlon_decode" "vector,double") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatdisf2_i387" @@ -4600,6 +4630,7 @@ (set_attr "mode" "DF") (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsidf2_sse" @@ -4610,6 +4641,7 @@ [(set_attr "type" "sseicvt") (set_attr "mode" "DF") (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatsidf2_i387" @@ -4643,6 +4675,7 @@ (set_attr "mode" "DF") (set_attr "unit" "*,i387,*,*") (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "amdfam10_decode" "*,*,vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatdidf2_sse" @@ -4653,6 +4686,7 @@ [(set_attr "type" "sseicvt") (set_attr "mode" "DF") (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") (set_attr "fp_int_src" "true")]) (define_insn "*floatdidf2_i387" @@ -6860,6 +6894,14 @@ "TARGET_64BIT" "") +;; On AMDFAM10 +;; IMUL reg64, reg64, imm8 Direct +;; IMUL reg64, mem64, imm8 VectorPath +;; IMUL reg64, reg64, imm32 Direct +;; IMUL reg64, mem64, imm32 VectorPath +;; IMUL reg64, reg64 Direct +;; IMUL reg64, mem64 Direct + (define_insn "*muldi3_1_rex64" [(set (match_operand:DI 0 "register_operand" "=r,r,r") (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0") @@ -6882,6 +6924,11 @@ (match_operand 1 "memory_operand" "")) (const_string "vector")] (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "DI")]) (define_expand "mulsi3" @@ -6892,6 +6939,14 @@ "" "") +;; On AMDFAM10 +;; IMUL reg32, reg32, imm8 Direct +;; IMUL reg32, mem32, imm8 VectorPath +;; IMUL reg32, reg32, imm32 Direct +;; IMUL reg32, mem32, imm32 VectorPath +;; IMUL reg32, reg32 Direct +;; IMUL reg32, mem32 Direct + (define_insn "*mulsi3_1" [(set (match_operand:SI 0 "register_operand" "=r,r,r") (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") @@ -6913,6 +6968,11 @@ (match_operand 1 "memory_operand" "")) (const_string "vector")] (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "SI")]) (define_insn "*mulsi3_1_zext" @@ -6938,6 +6998,11 @@ (match_operand 1 "memory_operand" "")) (const_string "vector")] (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "SI")]) (define_expand "mulhi3" @@ -6948,6 +7013,13 @@ "TARGET_HIMODE_MATH" "") +;; On AMDFAM10 +;; IMUL reg16, reg16, imm8 VectorPath +;; IMUL reg16, mem16, imm8 VectorPath +;; IMUL reg16, reg16, imm16 VectorPath +;; IMUL reg16, mem16, imm16 VectorPath +;; IMUL reg16, reg16 Direct +;; IMUL reg16, mem16 Direct (define_insn "*mulhi3_1" [(set (match_operand:HI 0 "register_operand" "=r,r,r") (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") @@ -6966,6 +7038,10 @@ (eq_attr "alternative" "1,2") (const_string "vector")] (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(eq_attr "alternative" "0,1") + (const_string "vector")] + (const_string "direct"))) (set_attr "mode" "HI")]) (define_expand "mulqi3" @@ -6976,6 +7052,10 @@ "TARGET_QIMODE_MATH" "") +;;On AMDFAM10 +;; MUL reg8 Direct +;; MUL mem8 Direct + (define_insn "*mulqi3_1" [(set (match_operand:QI 0 "register_operand" "=a") (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") @@ -6990,6 +7070,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulqihi3" @@ -7016,6 +7097,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "mulqihi3" @@ -7040,6 +7122,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") (set_attr "mode" "QI")]) (define_expand "umulditi3" @@ -7066,6 +7149,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers @@ -7093,6 +7177,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "mulditi3" @@ -7119,6 +7204,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "mulsidi3" @@ -7145,6 +7231,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "umuldi3_highpart" @@ -7181,6 +7268,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "umulsi3_highpart" @@ -7216,6 +7304,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_insn "*umulsi3_highpart_zext" @@ -7238,6 +7327,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_expand "smuldi3_highpart" @@ -7273,6 +7363,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "DI")]) (define_expand "smulsi3_highpart" @@ -7307,6 +7398,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) (define_insn "*smulsi3_highpart_zext" @@ -7328,6 +7420,7 @@ (if_then_else (eq_attr "cpu" "athlon") (const_string "vector") (const_string "double"))) + (set_attr "amdfam10_decode" "double") (set_attr "mode" "SI")]) ;; The patterns that match these are at the end of this file. @@ -10309,7 +10402,8 @@ [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_64_shift_adj" [(set (reg:CCZ FLAGS_REG) @@ -10524,7 +10618,8 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "SI") (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) (define_expand "x86_shift_adj_1" [(set (reg:CCZ FLAGS_REG) @@ -11284,7 +11379,8 @@ [(set_attr "type" "ishift") (set_attr "prefix_0f" "1") (set_attr "mode" "DI") - (set_attr "athlon_decode" "vector")]) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) (define_expand "ashrdi3" [(set (match_operand:DI 0 "shiftdi_operand" "") @@ -14558,7 +14654,23 @@ [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) (clobber (reg:CC FLAGS_REG))])] "" - "") +{ + if (TARGET_ABM) + { + emit_insn (gen_clzsi2_abm (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "clzsi2_abm" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) (define_insn "*bsr" [(set (match_operand:SI 0 "register_operand" "=r") @@ -14567,7 +14679,44 @@ (clobber (reg:CC FLAGS_REG))] "" "bsr{l}\t{%1, %0|%0, %1}" - [(set_attr "prefix_0f" "1")]) + [(set_attr "prefix_0f" "1") + (set_attr "mode" "SI")]) + +(define_insn "popcountsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" + "popcnt{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_insn "*popcountsi2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (popcount:SI (match_dup 1)))] + "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" + "popcnt{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_insn "*popcountsi2_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI(popcount:SI (match_dup 1))))] + "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" + "popcnt{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) (define_expand "clzdi2" [(parallel @@ -14579,7 +14728,23 @@ [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT" - "") +{ + if (TARGET_ABM) + { + emit_insn (gen_clzdi2_abm (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "clzdi2_abm" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_ABM" + "lzcnt{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) (define_insn "*bsr_rex64" [(set (match_operand:DI 0 "register_operand" "=r") @@ -14588,7 +14753,92 @@ (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" "bsr{q}\t{%1, %0|%0, %1}" - [(set_attr "prefix_0f" "1")]) + [(set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_POPCNT" + "popcnt{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) + +(define_insn "*popcountdi2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_dup 1)))] + "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" + "popcnt{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) + +(define_expand "clzhi2" + [(parallel + [(set (match_operand:HI 0 "register_operand" "") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzhi2_abm (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "clzhi2_abm" + [(set (match_operand:HI 0 "register_operand" "=r") + (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "HI")]) + +(define_insn "*bsrhi" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "HI")]) + +(define_insn "popcounthi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (popcount:HI (match_operand:HI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" + "popcnt{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "HI")]) + +(define_insn "*popcounthi2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:HI 0 "register_operand" "=r") + (popcount:HI (match_dup 1)))] + "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" + "popcnt{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "HI")]) ;; Thread-local storage patterns for ELF. ;; @@ -15494,7 +15744,8 @@ sqrtss\t{%1, %0|%0, %1}" [(set_attr "type" "fpspc,sse") (set_attr "mode" "SF,SF") - (set_attr "athlon_decode" "direct,*")]) + (set_attr "athlon_decode" "direct,*") + (set_attr "amdfam10_decode" "direct,*")]) (define_insn "*sqrtsf2_sse" [(set (match_operand:SF 0 "register_operand" "=x") @@ -15503,7 +15754,8 @@ "sqrtss\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "mode" "SF") - (set_attr "athlon_decode" "*")]) + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*")]) (define_insn "*sqrtsf2_i387" [(set (match_operand:SF 0 "register_operand" "=f") @@ -15512,7 +15764,8 @@ "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "SF") - (set_attr "athlon_decode" "direct")]) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) (define_expand "sqrtdf2" [(set (match_operand:DF 0 "register_operand" "") @@ -15532,7 +15785,8 @@ sqrtsd\t{%1, %0|%0, %1}" [(set_attr "type" "fpspc,sse") (set_attr "mode" "DF,DF") - (set_attr "athlon_decode" "direct,*")]) + (set_attr "athlon_decode" "direct,*") + (set_attr "amdfam10_decode" "direct,*")]) (define_insn "*sqrtdf2_sse" [(set (match_operand:DF 0 "register_operand" "=Y") @@ -15541,7 +15795,8 @@ "sqrtsd\t{%1, %0|%0, %1}" [(set_attr "type" "sse") (set_attr "mode" "DF") - (set_attr "athlon_decode" "*")]) + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*")]) (define_insn "*sqrtdf2_i387" [(set (match_operand:DF 0 "register_operand" "=f") @@ -15550,7 +15805,8 @@ "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "DF") - (set_attr "athlon_decode" "direct")]) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*sqrtextendsfdf2_i387" [(set (match_operand:DF 0 "register_operand" "=f") @@ -15561,7 +15817,8 @@ "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "DF") - (set_attr "athlon_decode" "direct")]) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) (define_insn "sqrtxf2" [(set (match_operand:XF 0 "register_operand" "=f") @@ -15570,7 +15827,8 @@ "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*sqrtextendsfxf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") @@ -15580,7 +15838,8 @@ "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) (define_insn "*sqrtextenddfxf2_i387" [(set (match_operand:XF 0 "register_operand" "=f") @@ -15590,7 +15849,8 @@ "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") - (set_attr "athlon_decode" "direct")]) + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) (define_insn "fpremxf4" [(set (match_operand:XF 0 "register_operand" "=f") @@ -20391,7 +20651,7 @@ (mult:DI (match_operand:DI 1 "memory_operand" "") (match_operand:DI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) @@ -20404,7 +20664,7 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) @@ -20418,7 +20678,7 @@ (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) @@ -20435,7 +20695,7 @@ (match_operand:DI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:DI 3 "r")] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) @@ -20451,7 +20711,7 @@ (match_operand:SI 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:SI 3 "r")] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size + "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) @@ -20467,7 +20727,7 @@ (match_operand:HI 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))]) (match_scratch:HI 3 "r")] - "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" + "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size" [(set (match_dup 3) (match_dup 2)) (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] |