summaryrefslogtreecommitdiffstats
path: root/contrib/gcc/config/i386/i386.md
diff options
context:
space:
mode:
authorpfg <pfg@FreeBSD.org>2013-06-01 01:02:24 +0000
committerpfg <pfg@FreeBSD.org>2013-06-01 01:02:24 +0000
commitcd8fbd7550760be2e4709d4855f6933f39bdc10f (patch)
tree3549d0ddfd14aee8437b42854867259626cb26fe /contrib/gcc/config/i386/i386.md
parent772c877e7b062cfc4d7136ef851c2eccae56df97 (diff)
downloadFreeBSD-src-cd8fbd7550760be2e4709d4855f6933f39bdc10f.zip
FreeBSD-src-cd8fbd7550760be2e4709d4855f6933f39bdc10f.tar.gz
GCC: bring back experimental support for amdfam10/barcelona CPUs.
Initial support for the AMD amdfam10 chipsets has been available in the gcc43 branch under GPLv2. AMD and some linux distributions (OpenSUSE) did a backport of the amdfam10 support and made it available. This is a revised subset of the support initially brought in in r236962 and later reverted. The collateral efects seem to have disappeared but it is still recommended to set the CPUTYPE with caution. Reviewed by: jkim (ages ago) MFC after: 3 weeks
Diffstat (limited to 'contrib/gcc/config/i386/i386.md')
-rw-r--r--contrib/gcc/config/i386/i386.md352
1 files changed, 306 insertions, 46 deletions
diff --git a/contrib/gcc/config/i386/i386.md b/contrib/gcc/config/i386/i386.md
index bd81c4a..21b9cb3 100644
--- a/contrib/gcc/config/i386/i386.md
+++ b/contrib/gcc/config/i386/i386.md
@@ -153,6 +153,12 @@
(UNSPEC_PSHUFB 120)
(UNSPEC_PSIGN 121)
(UNSPEC_PALIGNR 122)
+
+ ; For SSE4A support
+ (UNSPEC_EXTRQI 130)
+ (UNSPEC_EXTRQ 131)
+ (UNSPEC_INSERTQI 132)
+ (UNSPEC_INSERTQ 133)
])
(define_constants
@@ -192,7 +198,8 @@
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
+ nocona,core2,generic32,generic64,amdfam10"
(const (symbol_ref "ix86_tune")))
;; A basic instruction type. Refinements due to arguments to be
@@ -203,10 +210,10 @@
incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
icmp,test,ibr,setcc,icmov,
push,pop,call,callv,leave,
- str,cld,
+ str,bitmanip,cld,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
sselog,sselog1,sseiadd,sseishft,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -220,7 +227,7 @@
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "i387")
(eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
@@ -230,7 +237,8 @@
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
- (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
+ (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave,
+ bitmanip")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
@@ -284,7 +292,7 @@
;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
(if_then_else
- (ior (eq_attr "type" "imovx,setcc,icmov")
+ (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
(eq_attr "unit" "sse,mmx"))
(const_int 1)
(const_int 0)))
@@ -413,7 +421,7 @@
(const_string "load")
(and (eq_attr "type"
"!alu1,negnot,ishift1,
- imov,imovx,icmp,test,
+ imov,imovx,icmp,test,bitmanip,
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
mmx,mmxmov,mmxcmp,mmxcvt")
@@ -968,10 +976,11 @@
"sahf"
[(set_attr "length" "1")
(set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "SI")])
;; Pentium Pro can do steps 1 through 3 in one go.
-
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
(define_insn "*cmpfp_i_mixed"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP (match_operand 0 "register_operand" "f,x")
@@ -985,7 +994,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_i_sse"
[(set (reg:CCFP FLAGS_REG)
@@ -1000,7 +1010,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_i_i387"
[(set (reg:CCFP FLAGS_REG)
@@ -1019,7 +1030,8 @@
(const_string "DF")
]
(const_string "XF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_mixed"
[(set (reg:CCFPU FLAGS_REG)
@@ -1034,7 +1046,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_sse"
[(set (reg:CCFPU FLAGS_REG)
@@ -1049,7 +1062,8 @@
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_387"
[(set (reg:CCFPU FLAGS_REG)
@@ -1068,7 +1082,8 @@
(const_string "DF")
]
(const_string "XF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
;; Move instructions.
@@ -1274,7 +1289,8 @@
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movhi"
[(set (match_operand:HI 0 "nonimmediate_operand" "")
@@ -1391,8 +1407,10 @@
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swaphi_2"
[(set (match_operand:HI 0 "register_operand" "+r")
(match_operand:HI 1 "register_operand" "+r"))
@@ -1565,8 +1583,10 @@
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swapqi_2"
[(set (match_operand:QI 0 "register_operand" "+q")
(match_operand:QI 1 "register_operand" "+q"))
@@ -2120,7 +2140,8 @@
[(set_attr "type" "imov")
(set_attr "mode" "DI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -4150,7 +4171,8 @@
"cvttss2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncdfdi_sse"
[(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -4159,7 +4181,8 @@
"cvttsd2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncsfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4168,7 +4191,8 @@
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncdfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4177,7 +4201,8 @@
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
;; Avoid vector decoded forms of the instruction.
(define_peephole2
@@ -4438,7 +4463,8 @@
[(set_attr "length" "2")
(set_attr "mode" "HI")
(set_attr "unit" "i387")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
;; Conversion between fixed point and floating point.
@@ -4489,6 +4515,7 @@
(set_attr "mode" "SF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_sse"
@@ -4499,6 +4526,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_i387"
@@ -4532,6 +4560,7 @@
(set_attr "mode" "SF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_sse"
@@ -4542,6 +4571,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_i387"
@@ -4600,6 +4630,7 @@
(set_attr "mode" "DF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_sse"
@@ -4610,6 +4641,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_i387"
@@ -4643,6 +4675,7 @@
(set_attr "mode" "DF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_sse"
@@ -4653,6 +4686,7 @@
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_i387"
@@ -6860,6 +6894,14 @@
"TARGET_64BIT"
"")
+;; On AMDFAM10
+;; IMUL reg64, reg64, imm8 Direct
+;; IMUL reg64, mem64, imm8 VectorPath
+;; IMUL reg64, reg64, imm32 Direct
+;; IMUL reg64, mem64, imm32 VectorPath
+;; IMUL reg64, reg64 Direct
+;; IMUL reg64, mem64 Direct
+
(define_insn "*muldi3_1_rex64"
[(set (match_operand:DI 0 "register_operand" "=r,r,r")
(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6882,6 +6924,11 @@
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "DI")])
(define_expand "mulsi3"
@@ -6892,6 +6939,14 @@
""
"")
+;; On AMDFAM10
+;; IMUL reg32, reg32, imm8 Direct
+;; IMUL reg32, mem32, imm8 VectorPath
+;; IMUL reg32, reg32, imm32 Direct
+;; IMUL reg32, mem32, imm32 VectorPath
+;; IMUL reg32, reg32 Direct
+;; IMUL reg32, mem32 Direct
+
(define_insn "*mulsi3_1"
[(set (match_operand:SI 0 "register_operand" "=r,r,r")
(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6913,6 +6968,11 @@
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_insn "*mulsi3_1_zext"
@@ -6938,6 +6998,11 @@
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_expand "mulhi3"
@@ -6948,6 +7013,13 @@
"TARGET_HIMODE_MATH"
"")
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 VectorPath
+;; IMUL reg16, mem16, imm8 VectorPath
+;; IMUL reg16, reg16, imm16 VectorPath
+;; IMUL reg16, mem16, imm16 VectorPath
+;; IMUL reg16, reg16 Direct
+;; IMUL reg16, mem16 Direct
(define_insn "*mulhi3_1"
[(set (match_operand:HI 0 "register_operand" "=r,r,r")
(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6966,6 +7038,10 @@
(eq_attr "alternative" "1,2")
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(eq_attr "alternative" "0,1")
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "HI")])
(define_expand "mulqi3"
@@ -6976,6 +7052,10 @@
"TARGET_QIMODE_MATH"
"")
+;;On AMDFAM10
+;; MUL reg8 Direct
+;; MUL mem8 Direct
+
(define_insn "*mulqi3_1"
[(set (match_operand:QI 0 "register_operand" "=a")
(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
@@ -6990,6 +7070,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulqihi3"
@@ -7016,6 +7097,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "mulqihi3"
@@ -7040,6 +7122,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulditi3"
@@ -7066,6 +7149,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
@@ -7093,6 +7177,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "mulditi3"
@@ -7119,6 +7204,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "mulsidi3"
@@ -7145,6 +7231,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "umuldi3_highpart"
@@ -7181,6 +7268,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "umulsi3_highpart"
@@ -7216,6 +7304,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_insn "*umulsi3_highpart_zext"
@@ -7238,6 +7327,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "smuldi3_highpart"
@@ -7273,6 +7363,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "smulsi3_highpart"
@@ -7307,6 +7398,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_insn "*smulsi3_highpart_zext"
@@ -7328,6 +7420,7 @@
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
;; The patterns that match these are at the end of this file.
@@ -10309,7 +10402,8 @@
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_64_shift_adj"
[(set (reg:CCZ FLAGS_REG)
@@ -10524,7 +10618,8 @@
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_shift_adj_1"
[(set (reg:CCZ FLAGS_REG)
@@ -11284,7 +11379,8 @@
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "ashrdi3"
[(set (match_operand:DI 0 "shiftdi_operand" "")
@@ -14558,7 +14654,23 @@
[(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
(clobber (reg:CC FLAGS_REG))])]
""
- "")
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzsi2_abm"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
(define_insn "*bsr"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -14567,7 +14679,44 @@
(clobber (reg:CC FLAGS_REG))]
""
"bsr{l}\t{%1, %0|%0, %1}"
- [(set_attr "prefix_0f" "1")])
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")])
+
+(define_insn "popcountsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+ "popcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn "*popcountsi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_dup 1)))]
+ "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn "*popcountsi2_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI(popcount:SI (match_dup 1))))]
+ "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
(define_expand "clzdi2"
[(parallel
@@ -14579,7 +14728,23 @@
[(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT"
- "")
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzdi2_abm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_ABM"
+ "lzcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
(define_insn "*bsr_rex64"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -14588,7 +14753,92 @@
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
"bsr{q}\t{%1, %0|%0, %1}"
- [(set_attr "prefix_0f" "1")])
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "popcountdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_POPCNT"
+ "popcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
+
+(define_insn "*popcountdi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_dup 1)))]
+ "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
+
+(define_expand "clzhi2"
+ [(parallel
+ [(set (match_operand:HI 0 "register_operand" "")
+ (minus:HI (const_int 15)
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzhi2_abm"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
+
+(define_insn "*bsrhi"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (minus:HI (const_int 15)
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsr{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "HI")])
+
+(define_insn "popcounthi2"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+ "popcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
+
+(define_insn "*popcounthi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "register_operand" "=r")
+ (popcount:HI (match_dup 1)))]
+ "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
;; Thread-local storage patterns for ELF.
;;
@@ -15494,7 +15744,8 @@
sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "fpspc,sse")
(set_attr "mode" "SF,SF")
- (set_attr "athlon_decode" "direct,*")])
+ (set_attr "athlon_decode" "direct,*")
+ (set_attr "amdfam10_decode" "direct,*")])
(define_insn "*sqrtsf2_sse"
[(set (match_operand:SF 0 "register_operand" "=x")
@@ -15503,7 +15754,8 @@
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "*")])
+ (set_attr "athlon_decode" "*")
+ (set_attr "amdfam10_decode" "*")])
(define_insn "*sqrtsf2_i387"
[(set (match_operand:SF 0 "register_operand" "=f")
@@ -15512,7 +15764,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "")
@@ -15532,7 +15785,8 @@
sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "fpspc,sse")
(set_attr "mode" "DF,DF")
- (set_attr "athlon_decode" "direct,*")])
+ (set_attr "athlon_decode" "direct,*")
+ (set_attr "amdfam10_decode" "direct,*")])
(define_insn "*sqrtdf2_sse"
[(set (match_operand:DF 0 "register_operand" "=Y")
@@ -15541,7 +15795,8 @@
"sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "*")])
+ (set_attr "athlon_decode" "*")
+ (set_attr "amdfam10_decode" "*")])
(define_insn "*sqrtdf2_i387"
[(set (match_operand:DF 0 "register_operand" "=f")
@@ -15550,7 +15805,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrtextendsfdf2_i387"
[(set (match_operand:DF 0 "register_operand" "=f")
@@ -15561,7 +15817,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "sqrtxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -15570,7 +15827,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrtextendsfxf2_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -15580,7 +15838,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrtextenddfxf2_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -15590,7 +15849,8 @@
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "fpremxf4"
[(set (match_operand:XF 0 "register_operand" "=f")
@@ -20391,7 +20651,7 @@
(mult:DI (match_operand:DI 1 "memory_operand" "")
(match_operand:DI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
@@ -20404,7 +20664,7 @@
(mult:SI (match_operand:SI 1 "memory_operand" "")
(match_operand:SI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
@@ -20418,7 +20678,7 @@
(mult:SI (match_operand:SI 1 "memory_operand" "")
(match_operand:SI 2 "immediate_operand" ""))))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
@@ -20435,7 +20695,7 @@
(match_operand:DI 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:DI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
@@ -20451,7 +20711,7 @@
(match_operand:SI 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:SI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
@@ -20467,7 +20727,7 @@
(match_operand:HI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:HI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
(clobber (reg:CC FLAGS_REG))])]
OpenPOWER on IntegriCloud