diff options
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/hevc_res_add.asm | 36 | ||||
-rw-r--r-- | libavcodec/x86/hevcdsp.h | 28 | ||||
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 28 |
3 files changed, 46 insertions, 46 deletions
diff --git a/libavcodec/x86/hevc_res_add.asm b/libavcodec/x86/hevc_res_add.asm index dc3e88a..869288f 100644 --- a/libavcodec/x86/hevc_res_add.asm +++ b/libavcodec/x86/hevc_res_add.asm @@ -1,5 +1,5 @@ ; /* -; * Provide SIMD optimizations for transform_add functions for HEVC decoding +; * Provide SIMD optimizations for add_residual functions for HEVC decoding ; * Copyright (c) 2014 Pierre-Edouard LEPERE ; * ; * This file is part of FFmpeg. @@ -52,7 +52,7 @@ cextern pw_1023 INIT_MMX mmxext ; void ff_hevc_tranform_add_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) -cglobal hevc_transform_add4_8, 3, 4, 6 +cglobal hevc_add_residual4_8, 3, 4, 6 TR_ADD_MMX_4_8 add r1, 16 lea r0, [r0+r2*2] @@ -135,8 +135,8 @@ cglobal hevc_transform_add4_8, 3, 4, 6 %macro TRANSFORM_ADD_8 0 -; void ff_hevc_transform_add8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) -cglobal hevc_transform_add8_8, 3, 4, 8 +; void ff_hevc_add_residual8_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +cglobal hevc_add_residual8_8, 3, 4, 8 lea r3, [r2*3] TR_ADD_SSE_8_8 add r1, 64 @@ -144,8 +144,8 @@ cglobal hevc_transform_add8_8, 3, 4, 8 TR_ADD_SSE_8_8 RET -; void ff_hevc_transform_add16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) -cglobal hevc_transform_add16_8, 3, 4, 7 +; void ff_hevc_add_residual16_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +cglobal hevc_add_residual16_8, 3, 4, 7 pxor m0, m0 lea r3, [r2*3] TR_ADD_SSE_16_32_8 0, r0, r0+r2 @@ -158,8 +158,8 @@ cglobal hevc_transform_add16_8, 3, 4, 7 %endrep RET -; void ff_hevc_transform_add32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) -cglobal hevc_transform_add32_8, 3, 4, 7 +; void ff_hevc_add_residual32_8_<opt>(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +cglobal hevc_add_residual32_8, 3, 4, 7 pxor m0, m0 TR_ADD_SSE_16_32_8 0, r0, r0+16 TR_ADD_SSE_16_32_8 64, r0+r2, r0+r2+16 @@ -179,8 +179,8 @@ TRANSFORM_ADD_8 %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 -; void ff_hevc_transform_add32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) -cglobal hevc_transform_add32_8, 3, 4, 7 +; void ff_hevc_add_residual32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride) +cglobal hevc_add_residual32_8, 3, 4, 7 pxor m0, m0 lea r3, [r2*3] TR_ADD_SSE_16_32_8 0, r0, r0+r2 @@ -195,7 +195,7 @@ cglobal hevc_transform_add32_8, 3, 4, 7 %endif ;----------------------------------------------------------------------------- -; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride) +; void ff_hevc_add_residual_10(pixel *dst, int16_t *block, int stride) ;----------------------------------------------------------------------------- %macro TR_ADD_SSE_8_10 4 mova m0, [%4] @@ -310,7 +310,7 @@ cglobal hevc_transform_add32_8, 3, 4, 7 INIT_MMX mmxext -cglobal hevc_transform_add4_10,3,4, 6 +cglobal hevc_add_residual4_10,3,4, 6 pxor m2, m2 mova m3, [max_pixels_10] TR_ADD_MMX4_10 r0, r2, r1 @@ -320,10 +320,10 @@ cglobal hevc_transform_add4_10,3,4, 6 RET ;----------------------------------------------------------------------------- -; void ff_hevc_transform_add_10(pixel *dst, int16_t *block, int stride) +; void ff_hevc_add_residual_10(pixel *dst, int16_t *block, int stride) ;----------------------------------------------------------------------------- INIT_XMM sse2 -cglobal hevc_transform_add8_10,3,4,6 +cglobal hevc_add_residual8_10,3,4,6 pxor m4, m4 mova m5, [max_pixels_10] lea r3, [r2*3] @@ -334,7 +334,7 @@ cglobal hevc_transform_add8_10,3,4,6 TR_ADD_SSE_8_10 r0, r2, r3, r1 RET -cglobal hevc_transform_add16_10,3,4,6 +cglobal hevc_add_residual16_10,3,4,6 pxor m4, m4 mova m5, [max_pixels_10] @@ -346,7 +346,7 @@ cglobal hevc_transform_add16_10,3,4,6 %endrep RET -cglobal hevc_transform_add32_10,3,4,6 +cglobal hevc_add_residual32_10,3,4,6 pxor m4, m4 mova m5, [max_pixels_10] @@ -361,7 +361,7 @@ cglobal hevc_transform_add32_10,3,4,6 %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 -cglobal hevc_transform_add16_10,3,4,6 +cglobal hevc_add_residual16_10,3,4,6 pxor m4, m4 mova m5, [max_pixels_10] lea r3, [r2*3] @@ -374,7 +374,7 @@ cglobal hevc_transform_add16_10,3,4,6 %endrep RET -cglobal hevc_transform_add32_10,3,4,6 +cglobal hevc_add_residual32_10,3,4,6 pxor m4, m4 mova m5, [max_pixels_10] diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index ad8168f..3cfdc27 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -239,23 +239,23 @@ WEIGHTING_PROTOTYPES(12, sse4); /////////////////////////////////////////////////////////////////////////////// // TRANSFORM_ADD /////////////////////////////////////////////////////////////////////////////// -void ff_hevc_transform_add4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual4_8_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual8_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual16_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual32_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual8_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual16_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual32_8_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual32_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual4_10_mmxext(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual8_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual16_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual32_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); -void ff_hevc_transform_add32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual16_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); +void ff_hevc_add_residual32_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride); #endif // AVCODEC_X86_HEVCDSP_H diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 09eb06d..da73d76 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -700,7 +700,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext; - c->transform_add[0] = ff_hevc_transform_add4_8_mmxext; + c->add_residual[0] = ff_hevc_add_residual4_8_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2; @@ -716,9 +716,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2; - c->transform_add[1] = ff_hevc_transform_add8_8_sse2; - c->transform_add[2] = ff_hevc_transform_add16_8_sse2; - c->transform_add[3] = ff_hevc_transform_add32_8_sse2; + c->add_residual[1] = ff_hevc_add_residual8_8_sse2; + c->add_residual[2] = ff_hevc_add_residual16_8_sse2; + c->add_residual[3] = ff_hevc_add_residual32_8_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { if(ARCH_X86_64) { @@ -748,9 +748,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) } SAO_BAND_INIT(8, avx); - c->transform_add[1] = ff_hevc_transform_add8_8_avx; - c->transform_add[2] = ff_hevc_transform_add16_8_avx; - c->transform_add[3] = ff_hevc_transform_add32_8_avx; + c->add_residual[1] = ff_hevc_add_residual8_8_avx; + c->add_residual[2] = ff_hevc_add_residual16_8_avx; + c->add_residual[3] = ff_hevc_add_residual32_8_avx; } if (EXTERNAL_AVX2(cpu_flags)) { c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; @@ -850,11 +850,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_8_avx2; c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_8_avx2; - c->transform_add[3] = ff_hevc_transform_add32_8_avx2; + c->add_residual[3] = ff_hevc_add_residual32_8_avx2; } } else if (bit_depth == 10) { if (EXTERNAL_MMXEXT(cpu_flags)) { - c->transform_add[0] = ff_hevc_transform_add4_10_mmxext; + c->add_residual[0] = ff_hevc_add_residual4_10_mmxext; c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext; c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext; } @@ -872,9 +872,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2; - c->transform_add[1] = ff_hevc_transform_add8_10_sse2; - c->transform_add[2] = ff_hevc_transform_add16_10_sse2; - c->transform_add[3] = ff_hevc_transform_add32_10_sse2; + c->add_residual[1] = ff_hevc_add_residual8_10_sse2; + c->add_residual[2] = ff_hevc_add_residual16_10_sse2; + c->add_residual[3] = ff_hevc_add_residual32_10_sse2; } if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3; @@ -1053,8 +1053,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) SAO_BAND_INIT(10, avx2); SAO_EDGE_INIT(10, avx2); - c->transform_add[2] = ff_hevc_transform_add16_10_avx2; - c->transform_add[3] = ff_hevc_transform_add32_10_avx2; + c->add_residual[2] = ff_hevc_add_residual16_10_avx2; + c->add_residual[3] = ff_hevc_add_residual32_10_avx2; } } else if (bit_depth == 12) { |