summaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/h264_idct_10bit.asm
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86/h264_idct_10bit.asm')
-rw-r--r--libavcodec/x86/h264_idct_10bit.asm86
1 files changed, 77 insertions, 9 deletions
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 432d74b..9fd05ab 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -5,32 +5,31 @@
;*
;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "libavutil/x86/x86util.asm"
-SECTION_RODATA
-
-pw_pixel_max: times 8 dw ((1 << 10)-1)
-pd_32: times 4 dd 32
-
SECTION .text
+cextern pw_1023
+%define pw_pixel_max pw_1023
+cextern pd_32
+
;-----------------------------------------------------------------------------
; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
@@ -84,8 +83,10 @@ cglobal h264_idct_add_10, 3,3
INIT_XMM sse2
IDCT_ADD_10
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD_10
+%endif
;-----------------------------------------------------------------------------
; void ff_h264_idct_add16_10(pixel *dst, const int *block_offset,
@@ -118,9 +119,11 @@ add4x4_idct %+ SUFFIX:
INIT_XMM sse2
ALIGN 16
ADD4x4IDCT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
ALIGN 16
ADD4x4IDCT
+%endif
%macro ADD16_OP 2
cmp byte [r4+%2], 0
@@ -157,8 +160,10 @@ cglobal h264_idct_add16_10, 5,6
INIT_XMM sse2
IDCT_ADD16_10
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD16_10
+%endif
;-----------------------------------------------------------------------------
; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
@@ -224,8 +229,10 @@ cglobal h264_idct8_dc_add_10,3,4,7
INIT_XMM sse2
IDCT8_DC_ADD
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_DC_ADD
+%endif
;-----------------------------------------------------------------------------
; void ff_h264_idct_add16intra_10(pixel *dst, const int *block_offset,
@@ -298,8 +305,10 @@ cglobal h264_idct_add16intra_10,5,7,8
INIT_XMM sse2
IDCT_ADD16INTRA_10
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD16INTRA_10
+%endif
%assign last_block 36
;-----------------------------------------------------------------------------
@@ -336,8 +345,63 @@ cglobal h264_idct_add8_10,5,8,7
INIT_XMM sse2
IDCT_ADD8
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD8
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_h264_idct_add8_422_10(pixel **dst, const int *block_offset,
+; int16_t *block, int stride,
+; const uint8_t nnzc[6*8])
+;-----------------------------------------------------------------------------
+%assign last_block 44
+
+%macro IDCT_ADD8_422 0
+
+cglobal h264_idct_add8_422_10, 5, 8, 7
+ movsxdifnidn r3, r3d
+%if ARCH_X86_64
+ mov r7, r0
+%endif
+
+ add r2, 1024
+ mov r0, [r0]
+ ADD16_OP_INTRA 16, 4+ 6*8
+ ADD16_OP_INTRA 18, 4+ 7*8
+ ADD16_OP_INTRA 24, 4+ 8*8 ; i+4
+ ADD16_OP_INTRA 26, 4+ 9*8 ; i+4
+ add r2, 1024-128*4
+
+%if ARCH_X86_64
+ mov r0, [r7+gprsize]
+%else
+ mov r0, r0m
+ mov r0, [r0+gprsize]
+%endif
+
+ ADD16_OP_INTRA 32, 4+11*8
+ ADD16_OP_INTRA 34, 4+12*8
+ ADD16_OP_INTRA 40, 4+13*8 ; i+4
+ ADD16_OP_INTRA 42, 4+14*8 ; i+4
+REP_RET
+ AC 16
+ AC 18
+ AC 24 ; i+4
+ AC 26 ; i+4
+ AC 32
+ AC 34
+ AC 40 ; i+4
+ AC 42 ; i+4
+
+%endmacro
+
+INIT_XMM sse2
+IDCT_ADD8_422
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+IDCT_ADD8_422
+%endif
;-----------------------------------------------------------------------------
; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride)
@@ -544,8 +608,10 @@ h264_idct8_add1_10 %+ SUFFIX:
INIT_XMM sse2
IDCT8_ADD
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_ADD
+%endif
;-----------------------------------------------------------------------------
; void ff_h264_idct8_add4_10(pixel **dst, const int *block_offset,
@@ -585,5 +651,7 @@ cglobal h264_idct8_add4_10, 0,7,16
INIT_XMM sse2
IDCT8_ADD4
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_ADD4
+%endif
OpenPOWER on IntegriCloud