summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-10-21 12:07:16 -0300
committerJames Almer <jamrial@gmail.com>2017-10-21 12:15:57 -0300
commit2904db90458a1253e4aea6844ba9a59ac11923b6 (patch)
treeb8410cd30199ff45585fce360fe6ec75a5fdcca3
parentede5ddb58683b1186271bf2144843adaff3390c9 (diff)
parent994c4bc10751e39c7ed9f67ffd0c0dea5223daf2 (diff)
downloadffmpeg-streaming-2904db90458a1253e4aea6844ba9a59ac11923b6.zip
ffmpeg-streaming-2904db90458a1253e4aea6844ba9a59ac11923b6.tar.gz
Merge commit '994c4bc10751e39c7ed9f67ffd0c0dea5223daf2'
* commit '994c4bc10751e39c7ed9f67ffd0c0dea5223daf2': x86util: Port all macros to cpuflags See d5f8a642f6eb1c6e305c41dabddd0fd36ffb3f77 Merged-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/audiodsp.asm5
-rw-r--r--libavfilter/x86/yadif-16.asm24
-rw-r--r--libavutil/x86/x86util.asm42
-rw-r--r--libswscale/x86/scale.asm10
4 files changed, 28 insertions, 53 deletions
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 3973808..de395e5 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -62,7 +62,7 @@ SCALARPRODUCT
; %1 = number of xmm registers used
; %2 = number of inline load/process/store loops per asm loop
; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop
-; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2)
+; %4 = CLIPD function takes min/max as float instead of int (SSE2 version)
; %5 = suffix
%macro VECTOR_CLIP_INT32 4-5
cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
@@ -118,14 +118,11 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
%endmacro
INIT_MMX mmx
-%define CLIPD CLIPD_MMX
VECTOR_CLIP_INT32 0, 1, 0, 0
INIT_XMM sse2
VECTOR_CLIP_INT32 6, 1, 0, 0, _int
-%define CLIPD CLIPD_SSE2
VECTOR_CLIP_INT32 6, 2, 0, 1
INIT_XMM sse4
-%define CLIPD CLIPD_SSE41
%ifdef m8
VECTOR_CLIP_INT32 11, 1, 1, 0
%else
diff --git a/libavfilter/x86/yadif-16.asm b/libavfilter/x86/yadif-16.asm
index 79d127d..9053b37 100644
--- a/libavfilter/x86/yadif-16.asm
+++ b/libavfilter/x86/yadif-16.asm
@@ -54,30 +54,6 @@ SECTION .text
%endif
%endmacro
-%macro PMINSD 3
-%if cpuflag(sse4)
- pminsd %1, %2
-%else
- mova %3, %2
- pcmpgtd %3, %1
- pand %1, %3
- pandn %3, %2
- por %1, %3
-%endif
-%endmacro
-
-%macro PMAXSD 3
-%if cpuflag(sse4)
- pmaxsd %1, %2
-%else
- mova %3, %1
- pcmpgtd %3, %2
- pand %1, %3
- pandn %3, %2
- por %1, %3
-%endif
-%endmacro
-
%macro PMAXUW 2
%if cpuflag(sse4)
pmaxuw %1, %2
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index e1220df..2141912 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -357,7 +357,7 @@
%endif
%endmacro
-%macro ABSB 2 ; source mmreg, temp mmreg (unused for ssse3)
+%macro ABSB 2 ; source mmreg, temp mmreg (unused for SSSE3)
%if cpuflag(ssse3)
pabsb %1, %1
%else
@@ -381,7 +381,7 @@
%endif
%endmacro
-%macro ABSD2_MMX 4
+%macro ABSD2 4
pxor %3, %3
pxor %4, %4
pcmpgtd %3, %1
@@ -475,7 +475,7 @@
%else
palignr %1, %2, %3
%endif
-%elif cpuflag(mmx) ; [dst,] src1, src2, imm, tmp
+%else ; [dst,] src1, src2, imm, tmp
%define %%dst %1
%if %0==5
%ifnidn %1, %2
@@ -799,37 +799,47 @@
pminsw %1, %3
%endmacro
-%macro PMINSD_MMX 3 ; dst, src, tmp
+%macro PMINSD 3 ; dst, src, tmp/unused
+%if cpuflag(sse4)
+ pminsd %1, %2
+%elif cpuflag(sse2)
+ cvtdq2ps %1, %1
+ minps %1, %2
+ cvtps2dq %1, %1
+%else
mova %3, %2
pcmpgtd %3, %1
pxor %1, %2
pand %1, %3
pxor %1, %2
+%endif
%endmacro
-%macro PMAXSD_MMX 3 ; dst, src, tmp
+%macro PMAXSD 3 ; dst, src, tmp/unused
+%if cpuflag(sse4)
+ pmaxsd %1, %2
+%else
mova %3, %1
pcmpgtd %3, %2
pand %1, %3
pandn %3, %2
por %1, %3
+%endif
%endmacro
-%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
- PMINSD_MMX %1, %3, %4
- PMAXSD_MMX %1, %2, %4
-%endmacro
-
-%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
+%macro CLIPD 3-4
+%if cpuflag(sse4); src/dst, min, max, unused
+ pminsd %1, %3
+ pmaxsd %1, %2
+%elif cpuflag(sse2) ; src/dst, min (float), max (float), unused
cvtdq2ps %1, %1
minps %1, %3
maxps %1, %2
cvtps2dq %1, %1
-%endmacro
-
-%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused
- pminsd %1, %3
- pmaxsd %1, %2
+%else ; src/dst, min, max, tmp
+ PMINSD %1, %3, %4
+ PMAXSD %1, %2, %4
+%endif
%endmacro
%macro VBROADCASTSS 2 ; dst xmm/ymm, src m32/xmm
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index f978170..83cabff 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -364,15 +364,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi
movd [dstq+wq*2], m0
%endif ; %3 ==/!= X
%else ; %2 == 19
-%if mmsize == 8
- PMINSD_MMX m0, m2, m4
-%elif cpuflag(sse4)
- pminsd m0, m2
-%else ; sse2/ssse3
- cvtdq2ps m0, m0
- minps m0, m2
- cvtps2dq m0, m0
-%endif ; mmx/sse2/ssse3/sse4
+ PMINSD m0, m2, m4
%ifnidn %3, X
mova [dstq+wq*(4>>wshr)], m0
%else ; %3 == X
OpenPOWER on IntegriCloud