diff options
author | Thomas Mundt <tmundt75@gmail.com> | 2017-09-19 22:23:23 +0200 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-09-23 16:19:58 -0300 |
commit | 40bfaa190c61b6eeff1b76b767c12edd6609967d (patch) | |
tree | 533340612ea536e60bd9189fb110772e4513a49a /libavfilter/x86/vf_interlace.asm | |
parent | 58ca446672fec10e851b820ce7df64bd2d1f3a70 (diff) | |
download | ffmpeg-streaming-40bfaa190c61b6eeff1b76b767c12edd6609967d.zip ffmpeg-streaming-40bfaa190c61b6eeff1b76b767c12edd6609967d.tar.gz |
avfilter/interlace: add support for 10 and 12 bit
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Thomas Mundt <tmundt75@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter/x86/vf_interlace.asm')
-rw-r--r-- | libavfilter/x86/vf_interlace.asm | 80 |
1 files changed, 73 insertions, 7 deletions
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm index d0fffd2..7c0065d 100644 --- a/libavfilter/x86/vf_interlace.asm +++ b/libavfilter/x86/vf_interlace.asm @@ -30,27 +30,26 @@ pw_4: times 8 dw 4 SECTION .text -%macro LOWPASS_LINE 0 -cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref +%macro LOWPASS 1 add dstq, hq add srcq, hq add mrefq, srcq add prefq, srcq neg hq - pcmpeqb m6, m6 + pcmpeq%1 m6, m6 .loop: mova m0, [mrefq+hq] mova m1, [mrefq+hq+mmsize] - pavgb m0, [prefq+hq] - pavgb m1, [prefq+hq+mmsize] + pavg%1 m0, [prefq+hq] + pavg%1 m1, [prefq+hq+mmsize] pxor m0, m6 pxor m1, m6 pxor m2, m6, [srcq+hq] pxor m3, m6, [srcq+hq+mmsize] - pavgb m0, m2 - pavgb m1, m3 + pavg%1 m0, m2 + pavg%1 m1, m3 pxor m0, m6 pxor m1, m6 mova [dstq+hq], m0 @@ -59,7 +58,15 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref add hq, 2*mmsize jl .loop REP_RET +%endmacro + +%macro LOWPASS_LINE 0 +cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref + LOWPASS b +cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref + shl hq, 1 + LOWPASS w %endmacro %macro LOWPASS_LINE_COMPLEX 0 @@ -124,6 +131,65 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref jg .loop REP_RET +cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max + movd m7, DWORD clip_maxm + SPLATW m7, m7, 0 + mova [rsp], m7 +.loop: + mova m0, [srcq+mrefq] + mova m1, [srcq+mrefq+mmsize] + mova m2, [srcq+prefq] + mova m3, [srcq+prefq+mmsize] + paddw m0, m2 + paddw m1, m3 + mova m6, m0 + mova m7, m1 + mova m2, [srcq] + mova m3, [srcq+mmsize] + paddw m0, m2 + paddw m1, m3 + psllw m2, 1 + psllw m3, 1 + paddw m0, m2 + paddw m1, m3 + psllw m0, 1 + psllw m1, 1 + pcmpgtw m6, m2 + pcmpgtw m7, m3 + mova m2, [srcq+2*mrefq] + mova m3, [srcq+2*mrefq+mmsize] + mova m4, [srcq+2*prefq] + mova m5, [srcq+2*prefq+mmsize] + paddw m2, m4 + paddw m3, m5 + paddw m0, [pw_4] + paddw m1, [pw_4] + psubusw m0, m2 + psubusw m1, m3 + psrlw m0, 3 + psrlw m1, 3 + pminsw m0, [rsp] + pminsw m1, [rsp] + mova m2, m0 + mova m3, m1 + pmaxsw m0, [srcq] + pmaxsw m1, [srcq+mmsize] + pminsw m2, [srcq] + pminsw m3, [srcq+mmsize] + pand m0, m6 + pand m1, m7 + pandn m6, m2 + pandn m7, m3 + por m0, m6 + por m1, m7 + mova [dstq], m0 + mova [dstq+mmsize], m1 + + add dstq, 2*mmsize + add srcq, 2*mmsize + sub hd, mmsize + jg .loop +REP_RET %endmacro INIT_XMM sse2 |