summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-10-21 12:28:39 -0300
committerJames Almer <jamrial@gmail.com>2017-10-21 12:28:39 -0300
commit53eea3a5693af41ccb281fccb5a37905f522b9ad (patch)
treeb720d9a363067e5fc0e7ce5c4842ecc9a062fe17
parent2904db90458a1253e4aea6844ba9a59ac11923b6 (diff)
parent307eb1a8ee363db1fcf869e427a8deb6d9538881 (diff)
downloadffmpeg-streaming-53eea3a5693af41ccb281fccb5a37905f522b9ad.zip
ffmpeg-streaming-53eea3a5693af41ccb281fccb5a37905f522b9ad.tar.gz
Merge commit '307eb1a8ee363db1fcf869e427a8deb6d9538881'
* commit '307eb1a8ee363db1fcf869e427a8deb6d9538881': x86: vp8dsp: port FILTER_BILINEAR macro to cpuflags Merged-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/vp8dsp.asm143
1 files changed, 66 insertions, 77 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index e303b80..75de569 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -664,6 +664,37 @@ INIT_XMM sse2
FILTER_V 8
%macro FILTER_BILINEAR 1
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+ shl myd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+ movh m0, [srcq+srcstrideq*0]
+ movh m1, [srcq+srcstrideq*1]
+ movh m2, [srcq+srcstrideq*2]
+ punpcklbw m0, m1
+ punpcklbw m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
@@ -701,6 +732,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
+%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -708,6 +740,37 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
jg .nextrow
REP_RET
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+ shl mxd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m2, [filter_h2_shuf]
+ mova m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+ movu m0, [srcq+srcstrideq*0]
+ movu m1, [srcq+srcstrideq*1]
+ pshufb m0, m2
+ pshufb m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
@@ -746,6 +809,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
+%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -758,85 +822,10 @@ INIT_MMX mmxext
FILTER_BILINEAR 4
INIT_XMM sse2
FILTER_BILINEAR 8
-
-%macro FILTER_BILINEAR_SSSE3 1
-cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
- shl myd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m3, [bilinear_filter_vb+myq-16]
-.nextrow:
- movh m0, [srcq+srcstrideq*0]
- movh m1, [srcq+srcstrideq*1]
- movh m2, [srcq+srcstrideq*2]
- punpcklbw m0, m1
- punpcklbw m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-
- lea dstq, [dstq+dststrideq*2]
- lea srcq, [srcq+srcstrideq*2]
- sub heightd, 2
- jg .nextrow
- REP_RET
-
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
- shl mxd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m2, [filter_h2_shuf]
- mova m3, [bilinear_filter_vb+mxq-16]
-.nextrow:
- movu m0, [srcq+srcstrideq*0]
- movu m1, [srcq+srcstrideq*1]
- pshufb m0, m2
- pshufb m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-
- lea dstq, [dstq+dststrideq*2]
- lea srcq, [srcq+srcstrideq*2]
- sub heightd, 2
- jg .nextrow
- REP_RET
-%endmacro
-
INIT_MMX ssse3
-FILTER_BILINEAR_SSSE3 4
+FILTER_BILINEAR 4
INIT_XMM ssse3
-FILTER_BILINEAR_SSSE3 8
+FILTER_BILINEAR 8
INIT_MMX mmx
cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
OpenPOWER on IntegriCloud