summaryrefslogtreecommitdiffstats
path: root/libavfilter
diff options
context:
space:
mode:
authorMartin Vignali <martin.vignali@gmail.com>2017-12-30 19:30:56 +0100
committerMartin Vignali <martin.vignali@gmail.com>2018-01-11 21:03:19 +0100
commitb94cd55155d8c061f1e1faca9076afe540149c27 (patch)
tree84c7f92eeef9edaa0c9324fb836edf4fe6d77d67 /libavfilter
parentef21033c327a32a23c893d077148d4116c3cb0f7 (diff)
downloadffmpeg-streaming-b94cd55155d8c061f1e1faca9076afe540149c27.zip
ffmpeg-streaming-b94cd55155d8c061f1e1faca9076afe540149c27.tar.gz
avfilter/x86/vf_interlace : add AVX2 version
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/x86/vf_interlace.asm23
-rw-r--r--libavfilter/x86/vf_interlace_init.c12
-rw-r--r--libavfilter/x86/vf_tinterlace_init.c16
3 files changed, 50 insertions, 1 deletions
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index 06b2698..a6c65b8 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -39,6 +39,20 @@ SECTION .text
pcmpeq%1 m6, m6
+ test hq, mmsize
+ je .loop
+
+ ;process 1 * mmsize
+ movu m0, [mrefq+hq]
+ pavg%1 m0, [prefq+hq]
+ pxor m0, m6
+ pxor m2, m6, [srcq+hq]
+ pavg%1 m0, m2
+ pxor m0, m6
+ mova [dstq+hq], m0
+ add hq, mmsize
+ jge .end
+
.loop:
movu m0, [mrefq+hq]
movu m1, [mrefq+hq+mmsize]
@@ -57,7 +71,9 @@ SECTION .text
add hq, 2*mmsize
jl .loop
-REP_RET
+
+.end:
+ REP_RET
%endmacro
%macro LOWPASS_LINE 0
@@ -201,5 +217,10 @@ LOWPASS_LINE
INIT_XMM avx
LOWPASS_LINE
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+LOWPASS_LINE
+%endif
+
INIT_XMM sse2
LOWPASS_LINE_COMPLEX
diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
index b024b61..0de0fea 100644
--- a/libavfilter/x86/vf_interlace_init.c
+++ b/libavfilter/x86/vf_interlace_init.c
@@ -32,6 +32,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
@@ -39,6 +42,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
@@ -62,6 +68,9 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
if (EXTERNAL_AVX(cpu_flags))
if (s->lowpass == VLPF_LIN)
s->lowpass_line = ff_lowpass_line_16_avx;
+ if (EXTERNAL_AVX2_FAST(cpu_flags))
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_16_avx2;
} else {
if (EXTERNAL_SSE2(cpu_flags)) {
if (s->lowpass == VLPF_LIN)
@@ -72,5 +81,8 @@ av_cold void ff_interlace_init_x86(InterlaceContext *s, int depth)
if (EXTERNAL_AVX(cpu_flags))
if (s->lowpass == VLPF_LIN)
s->lowpass_line = ff_lowpass_line_avx;
+ if (EXTERNAL_AVX2_FAST(cpu_flags))
+ if (s->lowpass == VLPF_LIN)
+ s->lowpass_line = ff_lowpass_line_avx2;
}
}
diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c
index 2098129..2c9b1de 100644
--- a/libavfilter/x86/vf_tinterlace_init.c
+++ b/libavfilter/x86/vf_tinterlace_init.c
@@ -33,6 +33,9 @@ void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
@@ -40,6 +43,9 @@ void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
ptrdiff_t pref, int clip_max);
+void ff_lowpass_line_16_avx2 (uint8_t *dstp, ptrdiff_t linesize,
+ const uint8_t *srcp, ptrdiff_t mref,
+ ptrdiff_t pref, int clip_max);
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
const uint8_t *srcp, ptrdiff_t mref,
@@ -63,6 +69,11 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
if (EXTERNAL_AVX(cpu_flags))
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
s->lowpass_line = ff_lowpass_line_16_avx;
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF)) {
+ s->lowpass_line = ff_lowpass_line_16_avx2;
+ }
+ }
} else {
if (EXTERNAL_SSE2(cpu_flags)) {
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
@@ -73,5 +84,10 @@ av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
if (EXTERNAL_AVX(cpu_flags))
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
s->lowpass_line = ff_lowpass_line_avx;
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ if (!(s->flags & TINTERLACE_FLAG_CVLPF)) {
+ s->lowpass_line = ff_lowpass_line_avx2;
+ }
+ }
}
}
OpenPOWER on IntegriCloud