summaryrefslogtreecommitdiffstats
path: root/libavfilter/x86/vf_threshold.asm
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-12-07 22:01:54 +0100
committerMartin Vignali <martin.vignali@gmail.com>2017-12-09 14:47:09 +0100
commit869efbf971208faccfdd88680178afaf5b1d4e77 (patch)
tree5fd74b9eadc7f110be8596cd8f9dc7769aa2066b /libavfilter/x86/vf_threshold.asm
parent713f9c5b5d646c4be55b04d691bac21ecbd74089 (diff)
downloadffmpeg-streaming-869efbf971208faccfdd88680178afaf5b1d4e77.zip
ffmpeg-streaming-869efbf971208faccfdd88680178afaf5b1d4e77.tar.gz
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
Diffstat (limited to 'libavfilter/x86/vf_threshold.asm')
-rw-r--r--libavfilter/x86/vf_threshold.asm21
1 files changed, 14 insertions, 7 deletions
diff --git a/libavfilter/x86/vf_threshold.asm b/libavfilter/x86/vf_threshold.asm
index 56a6c24..098069b 100644
--- a/libavfilter/x86/vf_threshold.asm
+++ b/libavfilter/x86/vf_threshold.asm
@@ -25,16 +25,18 @@
SECTION_RODATA
pb_128: times 16 db 128
+pb_128_0 : times 8 db 0, 128
SECTION .text
-%macro THRESHOLD_8 0
+;%1 depth (8 or 16) ; %2 b or w ; %3 constant
+%macro THRESHOLD 3
%if ARCH_X86_64
-cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
+cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
mov wd, dword wm
mov hd, dword hm
%else
-cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
+cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
mov wd, r10m
%define ilinesizeq r5mp
%define tlinesizeq r6mp
@@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
%define olinesizeq r9mp
%define hd r11mp
%endif
- VBROADCASTI128 m4, [pb_128]
+ VBROADCASTI128 m4, [%3]
+%if %1 == 16
+ add wq, wq ; w *= 2 (16 bits instead of 8)
+%endif
add inq, wq
add thresholdq, wq
add minq, wq
@@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x
movu m3, [maxq + xq]
pxor m0, m4
pxor m1, m4
- pcmpgtb m0, m1
+ pcmpgt%2 m0, m1
PBLENDVB m3, m2, m0
movu [outq + xq], m3
add xq, mmsize
@@ -77,9 +82,11 @@ RET
%endmacro
INIT_XMM sse4
-THRESHOLD_8
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
-THRESHOLD_8
+THRESHOLD 8, b, pb_128
+THRESHOLD 16, w, pb_128_0
%endif
OpenPOWER on IntegriCloud