diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-12-14 08:09:18 -0500 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2013-12-14 12:13:26 -0500 |
commit | 8d4c616fc05f2f3c76d13594788129df72069f30 (patch) | |
tree | 97df1c49ee90fc77f708641e3d46297be84c8597 /libavcodec/x86/vp9dsp_init.c | |
parent | c65fe9e9822cf2a04e5507ddbb7f99e4b6cd93e9 (diff) | |
download | ffmpeg-streaming-8d4c616fc05f2f3c76d13594788129df72069f30.zip ffmpeg-streaming-8d4c616fc05f2f3c76d13594788129df72069f30.tar.gz |
vp9/x86: idct_add_16x16_ssse3.
Currently only dc-only and full 16x16. Other subforms will follow in the
near future. Total decoding time of ped1080p.webm goes from 9.7 to 9.3
seconds. DC-only goes from 957 -> 131 cycles, and the full IDCT goes
from ~4050 to ~745 cycles.
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 3c02520..5c31db6 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -153,6 +153,7 @@ filters_8tap_1d_fn3(avg) void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); #endif /* HAVE_YASM */ @@ -208,8 +209,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) init_subpel3(0, put, ssse3); init_subpel3(1, avg, ssse3); dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3; - if (ARCH_X86_64) + if (ARCH_X86_64) { dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; + } } #undef init_fpel |