diff options
author | Martin Vignali <martin.vignali@gmail.com> | 2017-12-07 22:01:54 +0100 |
---|---|---|
committer | Martin Vignali <martin.vignali@gmail.com> | 2017-12-09 14:47:09 +0100 |
commit | 869efbf971208faccfdd88680178afaf5b1d4e77 (patch) | |
tree | 5fd74b9eadc7f110be8596cd8f9dc7769aa2066b /libavfilter/x86/vf_threshold.asm | |
parent | 713f9c5b5d646c4be55b04d691bac21ecbd74089 (diff) | |
download | ffmpeg-streaming-869efbf971208faccfdd88680178afaf5b1d4e77.zip ffmpeg-streaming-869efbf971208faccfdd88680178afaf5b1d4e77.tar.gz |
avfilter/x86/vf_threshold : add threshold16 SIMD (SSE4 and AVX2)
Diffstat (limited to 'libavfilter/x86/vf_threshold.asm')
-rw-r--r-- | libavfilter/x86/vf_threshold.asm | 21 |
1 files changed, 14 insertions, 7 deletions
diff --git a/libavfilter/x86/vf_threshold.asm b/libavfilter/x86/vf_threshold.asm index 56a6c24..098069b 100644 --- a/libavfilter/x86/vf_threshold.asm +++ b/libavfilter/x86/vf_threshold.asm @@ -25,16 +25,18 @@ SECTION_RODATA pb_128: times 16 db 128 +pb_128_0 : times 8 db 0, 128 SECTION .text -%macro THRESHOLD_8 0 +;%1 depth (8 or 16) ; %2 b or w ; %3 constant +%macro THRESHOLD 3 %if ARCH_X86_64 -cglobal threshold8, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x +cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x mov wd, dword wm mov hd, dword hm %else -cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x +cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x mov wd, r10m %define ilinesizeq r5mp %define tlinesizeq r6mp @@ -43,7 +45,10 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x %define olinesizeq r9mp %define hd r11mp %endif - VBROADCASTI128 m4, [pb_128] + VBROADCASTI128 m4, [%3] +%if %1 == 16 + add wq, wq ; w *= 2 (16 bits instead of 8) +%endif add inq, wq add thresholdq, wq add minq, wq @@ -60,7 +65,7 @@ cglobal threshold8, 5, 7, 5, in, threshold, min, max, out, w, x movu m3, [maxq + xq] pxor m0, m4 pxor m1, m4 - pcmpgtb m0, m1 + pcmpgt%2 m0, m1 PBLENDVB m3, m2, m0 movu [outq + xq], m3 add xq, mmsize @@ -77,9 +82,11 @@ RET %endmacro INIT_XMM sse4 -THRESHOLD_8 +THRESHOLD 8, b, pb_128 +THRESHOLD 16, w, pb_128_0 %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 -THRESHOLD_8 +THRESHOLD 8, b, pb_128 +THRESHOLD 16, w, pb_128_0 %endif |