diff options
author | James Almer <jamrial@gmail.com> | 2017-10-24 19:10:22 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-10-24 19:10:22 -0300 |
commit | c0683dce89eceaab8783b8b47dd2346afc0a9276 (patch) | |
tree | e3371ed39fef828d16303befe75fd025a0c914c4 /libavcodec/arm/hevcdsp_init_neon.c | |
parent | 59b00ffea3c90b41f6e83f8184068657fed112dc (diff) | |
parent | 0b9a237b2386ff84a6f99716bd58fa27a1b767e7 (diff) | |
download | ffmpeg-streaming-c0683dce89eceaab8783b8b47dd2346afc0a9276.zip ffmpeg-streaming-c0683dce89eceaab8783b8b47dd2346afc0a9276.tar.gz |
Merge commit '0b9a237b2386ff84a6f99716bd58fa27a1b767e7'
* commit '0b9a237b2386ff84a6f99716bd58fa27a1b767e7':
hevc: Add NEON 4x4 and 8x8 IDCT
[15:12:59] <@ubitux> hevc_idct_4x4_8_c: 389.1
[15:13:00] <@ubitux> hevc_idct_4x4_8_neon: 126.6
[15:13:02] <@ubitux> our ^
[15:13:06] <@ubitux> hevc_idct_4x4_8_c: 389.3
[15:13:08] <@ubitux> hevc_idct_4x4_8_neon: 107.8
[15:13:10] <@ubitux> hevc_idct_4x4_10_c: 418.6
[15:13:12] <@ubitux> hevc_idct_4x4_10_neon: 108.1
[15:13:14] <@ubitux> libav ^
[15:13:30] <@ubitux> so yeah, we can probably trash our versions here
Merged-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/arm/hevcdsp_init_neon.c')
-rw-r--r-- | libavcodec/arm/hevcdsp_init_neon.c | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c index 1a3912c..8bc430e 100644 --- a/libavcodec/arm/hevcdsp_init_neon.c +++ b/libavcodec/arm/hevcdsp_init_neon.c @@ -27,8 +27,10 @@ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); -void ff_hevc_transform_4x4_neon_8(int16_t *coeffs, int col_limit); -void ff_hevc_transform_8x8_neon_8(int16_t *coeffs, int col_limit); +void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit); +void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit); void ff_hevc_idct_4x4_dc_neon_8(int16_t *coeffs); void ff_hevc_idct_8x8_dc_neon_8(int16_t *coeffs); void ff_hevc_idct_16x16_dc_neon_8(int16_t *coeffs); @@ -142,7 +144,7 @@ void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, uint8_t put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE); } -av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) +av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth) { if (bit_depth == 8) { int x; @@ -150,8 +152,8 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_neon; c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_neon; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_neon; - c->idct[0] = ff_hevc_transform_4x4_neon_8; - c->idct[1] = ff_hevc_transform_8x8_neon_8; + c->idct[0] = ff_hevc_idct_4x4_8_neon; + c->idct[1] = ff_hevc_idct_8x8_8_neon; c->idct_dc[0] = ff_hevc_idct_4x4_dc_neon_8; c->idct_dc[1] = ff_hevc_idct_8x8_dc_neon_8; c->idct_dc[2] = ff_hevc_idct_16x16_dc_neon_8; @@ -221,4 +223,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_qpel_uw_pixels_w48_neon_8; c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_qpel_uw_pixels_w64_neon_8; } + + if (bit_depth == 10) { + c->idct[0] = ff_hevc_idct_4x4_10_neon; + c->idct[1] = ff_hevc_idct_8x8_10_neon; + } } |