diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-03 00:48:12 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-03 00:48:12 +0000 |
commit | b06855f18a79d45b5ca212be89d84df3ee130cf7 (patch) | |
tree | 6f7f2c1e7d88cf954692d4d60ada26ea0a1e9232 /libavcodec | |
parent | 9862f9e149a20cca676b00dde0e01a73eda87ee0 (diff) | |
download | ffmpeg-streaming-b06855f18a79d45b5ca212be89d84df3ee130cf7.zip ffmpeg-streaming-b06855f18a79d45b5ca212be89d84df3ee130cf7.tar.gz |
SSSE3 versions of vp8 width4 bilinear MC functions
Originally committed as revision 24013 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/vp8dsp-init.c | 13 | ||||
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 25 |
2 files changed, 34 insertions, 4 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index 6247da9..698d394 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -85,6 +85,12 @@ extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride, extern void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, int dststride, uint8_t *src, int srcstride, int height, int mx, int my); +extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride, + uint8_t *src, int srcstride, + int height, int mx, int my); +extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride, + uint8_t *src, int srcstride, + int height, int mx, int my); extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride, uint8_t *src, int srcstride, @@ -92,13 +98,14 @@ extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride, extern void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, int dststride, uint8_t *src, int srcstride, int height, int mx, int my); -extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride, +extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride, uint8_t *src, int srcstride, int height, int mx, int my); -extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride, +extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride, uint8_t *src, int srcstride, int height, int mx, int my); + extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride, uint8_t *src, int srcstride, int height, int mx, int my); @@ -207,6 +214,7 @@ HVBILIN(mmxext, 8, 8, 16) HVBILIN(mmxext, 8, 16, 16) HVBILIN(sse2, 8, 8, 16) HVBILIN(sse2, 8, 16, 16) +HVBILIN(ssse3, 8, 4, 8) HVBILIN(ssse3, 8, 8, 16) HVBILIN(ssse3, 8, 16, 16) @@ -284,6 +292,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) VP8_MC_FUNC(2, 4, ssse3); VP8_BILINEAR_MC_FUNC(0, 16, ssse3); VP8_BILINEAR_MC_FUNC(1, 8, ssse3); + VP8_BILINEAR_MC_FUNC(2, 4, ssse3); } if (mm_flags & FF_MM_SSE4) { diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 3ac9ca9..2c3eee4 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -770,7 +770,8 @@ FILTER_BILINEAR mmxext, 4, 0 INIT_XMM FILTER_BILINEAR sse2, 8, 7 -cglobal put_vp8_bilinear8_v_ssse3, 7,7,5 +%macro FILTER_BILINEAR_SSSE3 1 +cglobal put_vp8_bilinear%1_v_ssse3, 7,7 shl r6d, 4 %ifdef PIC lea r11, [bilinear_filter_vb_m] @@ -789,9 +790,16 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5 psraw m1, 2 pavgw m0, m4 pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [r0+r1*0], m0 + movh [r0+r1*1], m1 +%else packuswb m0, m1 movh [r0+r1*0], m0 movhps [r0+r1*1], m0 +%endif lea r0, [r0+r1*2] lea r2, [r2+r3*2] @@ -799,7 +807,7 @@ cglobal put_vp8_bilinear8_v_ssse3, 7,7,5 jg .nextrow REP_RET -cglobal put_vp8_bilinear8_h_ssse3, 7,7,5 +cglobal put_vp8_bilinear%1_h_ssse3, 7,7 shl r5d, 4 %ifdef PIC lea r11, [bilinear_filter_vb_m] @@ -818,15 +826,28 @@ cglobal put_vp8_bilinear8_h_ssse3, 7,7,5 psraw m1, 2 pavgw m0, m4 pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [r0+r1*0], m0 + movh [r0+r1*1], m1 +%else packuswb m0, m1 movh [r0+r1*0], m0 movhps [r0+r1*1], m0 +%endif lea r0, [r0+r1*2] lea r2, [r2+r3*2] sub r4, 2 jg .nextrow REP_RET +%endmacro + +INIT_MMX +FILTER_BILINEAR_SSSE3 4 +INIT_XMM +FILTER_BILINEAR_SSSE3 8 cglobal put_vp8_pixels8_mmx, 5,5 .nextrow: |