diff options
Diffstat (limited to 'libavcodec/x86/h264_idct_10bit.asm')
-rw-r--r-- | libavcodec/x86/h264_idct_10bit.asm | 86 |
1 files changed, 77 insertions, 9 deletions
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm index 432d74b..9fd05ab 100644 --- a/libavcodec/x86/h264_idct_10bit.asm +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -5,32 +5,31 @@ ;* ;* Authors: Daniel Kang <daniel.d.kang@gmail.com> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** %include "libavutil/x86/x86util.asm" -SECTION_RODATA - -pw_pixel_max: times 8 dw ((1 << 10)-1) -pd_32: times 4 dd 32 - SECTION .text +cextern pw_1023 +%define pw_pixel_max pw_1023 +cextern pd_32 + ;----------------------------------------------------------------------------- ; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride) ;----------------------------------------------------------------------------- @@ -84,8 +83,10 @@ cglobal h264_idct_add_10, 3,3 INIT_XMM sse2 IDCT_ADD_10 +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD_10 +%endif ;----------------------------------------------------------------------------- ; void ff_h264_idct_add16_10(pixel *dst, const int *block_offset, @@ -118,9 +119,11 @@ add4x4_idct %+ SUFFIX: INIT_XMM sse2 ALIGN 16 ADD4x4IDCT +%if HAVE_AVX_EXTERNAL INIT_XMM avx ALIGN 16 ADD4x4IDCT +%endif %macro ADD16_OP 2 cmp byte [r4+%2], 0 @@ -157,8 +160,10 @@ cglobal h264_idct_add16_10, 5,6 INIT_XMM sse2 IDCT_ADD16_10 +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD16_10 +%endif ;----------------------------------------------------------------------------- ; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, int stride) @@ -224,8 +229,10 @@ cglobal h264_idct8_dc_add_10,3,4,7 INIT_XMM sse2 IDCT8_DC_ADD +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT8_DC_ADD +%endif ;----------------------------------------------------------------------------- ; void ff_h264_idct_add16intra_10(pixel *dst, const int *block_offset, @@ -298,8 +305,10 @@ cglobal h264_idct_add16intra_10,5,7,8 INIT_XMM sse2 IDCT_ADD16INTRA_10 +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD16INTRA_10 +%endif %assign last_block 36 ;----------------------------------------------------------------------------- @@ -336,8 +345,63 @@ cglobal h264_idct_add8_10,5,8,7 INIT_XMM sse2 IDCT_ADD8 +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT_ADD8 +%endif + +;----------------------------------------------------------------------------- +; void ff_h264_idct_add8_422_10(pixel **dst, const int *block_offset, +; int16_t *block, int stride, +; const uint8_t nnzc[6*8]) +;----------------------------------------------------------------------------- +%assign last_block 44 + +%macro IDCT_ADD8_422 0 + +cglobal h264_idct_add8_422_10, 5, 8, 7 + movsxdifnidn r3, r3d +%if ARCH_X86_64 + mov r7, r0 +%endif + + add r2, 1024 + mov r0, [r0] + ADD16_OP_INTRA 16, 4+ 6*8 + ADD16_OP_INTRA 18, 4+ 7*8 + ADD16_OP_INTRA 24, 4+ 8*8 ; i+4 + ADD16_OP_INTRA 26, 4+ 9*8 ; i+4 + add r2, 1024-128*4 + +%if ARCH_X86_64 + mov r0, [r7+gprsize] +%else + mov r0, r0m + mov r0, [r0+gprsize] +%endif + + ADD16_OP_INTRA 32, 4+11*8 + ADD16_OP_INTRA 34, 4+12*8 + ADD16_OP_INTRA 40, 4+13*8 ; i+4 + ADD16_OP_INTRA 42, 4+14*8 ; i+4 +REP_RET + AC 16 + AC 18 + AC 24 ; i+4 + AC 26 ; i+4 + AC 32 + AC 34 + AC 40 ; i+4 + AC 42 ; i+4 + +%endmacro + +INIT_XMM sse2 +IDCT_ADD8_422 +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +IDCT_ADD8_422 +%endif ;----------------------------------------------------------------------------- ; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride) @@ -544,8 +608,10 @@ h264_idct8_add1_10 %+ SUFFIX: INIT_XMM sse2 IDCT8_ADD +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT8_ADD +%endif ;----------------------------------------------------------------------------- ; void ff_h264_idct8_add4_10(pixel **dst, const int *block_offset, @@ -585,5 +651,7 @@ cglobal h264_idct8_add4_10, 0,7,16 INIT_XMM sse2 IDCT8_ADD4 +%if HAVE_AVX_EXTERNAL INIT_XMM avx IDCT8_ADD4 +%endif |