summaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp9dsp_init.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-12-14 08:09:18 -0500
committerRonald S. Bultje <rsbultje@gmail.com>2013-12-14 12:13:26 -0500
commit8d4c616fc05f2f3c76d13594788129df72069f30 (patch)
tree97df1c49ee90fc77f708641e3d46297be84c8597 /libavcodec/x86/vp9dsp_init.c
parentc65fe9e9822cf2a04e5507ddbb7f99e4b6cd93e9 (diff)
downloadffmpeg-streaming-8d4c616fc05f2f3c76d13594788129df72069f30.zip
ffmpeg-streaming-8d4c616fc05f2f3c76d13594788129df72069f30.tar.gz
vp9/x86: idct_add_16x16_ssse3.
Currently only dc-only and full 16x16. Other subforms will follow in the near future. Total decoding time of ped1080p.webm goes from 9.7 to 9.3 seconds. DC-only goes from 957 -> 131 cycles, and the full IDCT goes from ~4050 to ~745 cycles.
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r--libavcodec/x86/vp9dsp_init.c5
1 files changed, 4 insertions, 1 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 3c02520..5c31db6 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -153,6 +153,7 @@ filters_8tap_1d_fn3(avg)
void ff_vp9_idct_idct_4x4_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
void ff_vp9_idct_idct_8x8_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+void ff_vp9_idct_idct_16x16_add_ssse3(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
#endif /* HAVE_YASM */
@@ -208,8 +209,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_subpel3(0, put, ssse3);
init_subpel3(1, avg, ssse3);
dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
- if (ARCH_X86_64)
+ if (ARCH_X86_64) {
dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
+ dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
+ }
}
#undef init_fpel
OpenPOWER on IntegriCloud