diff options
Diffstat (limited to 'libavcodec/x86/h264_deblock_10bit.asm')
-rw-r--r-- | libavcodec/x86/h264_deblock_10bit.asm | 77 |
1 files changed, 38 insertions, 39 deletions
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index caf2701..d625eee 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -151,11 +151,11 @@ cextern pw_4 %endif %endmacro -%macro DEBLOCK_LUMA 1 +%macro DEBLOCK_LUMA 0 ;----------------------------------------------------------------------------- ; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16) +cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) %assign pad 5*mmsize+12-(stack_offset&15) %define tcm [rsp] %define ms1 [rsp+mmsize] @@ -210,7 +210,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16) ADD rsp, pad RET -cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) +cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) %assign pad 7*mmsize+12-(stack_offset&15) %define tcm [rsp] %define ms1 [rsp+mmsize] @@ -301,7 +301,6 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16) RET %endmacro -INIT_XMM %if ARCH_X86_64 ; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2 ; m12=alpha, m13=beta @@ -339,8 +338,8 @@ INIT_XMM SWAP 3, 9 %endmacro -%macro DEBLOCK_LUMA_64 1 -cglobal deblock_v_luma_10_%1, 5,5,15 +%macro DEBLOCK_LUMA_64 0 +cglobal deblock_v_luma_10, 5,5,15 %define p2 m8 %define p1 m0 %define p0 m1 @@ -377,7 +376,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15 jg .loop REP_RET -cglobal deblock_h_luma_10_%1, 5,7,15 +cglobal deblock_h_luma_10, 5,7,15 shl r2d, 2 shl r3d, 2 LOAD_AB m12, m13, r2d, r3d @@ -417,11 +416,11 @@ cglobal deblock_h_luma_10_%1, 5,7,15 REP_RET %endmacro -INIT_XMM -DEBLOCK_LUMA_64 sse2 +INIT_XMM sse2 +DEBLOCK_LUMA_64 %if HAVE_AVX -INIT_AVX -DEBLOCK_LUMA_64 avx +INIT_XMM avx +DEBLOCK_LUMA_64 %endif %endif @@ -604,8 +603,8 @@ DEBLOCK_LUMA_64 avx ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -%macro DEBLOCK_LUMA_INTRA_64 1 -cglobal deblock_v_luma_intra_10_%1, 4,7,16 +%macro DEBLOCK_LUMA_INTRA_64 0 +cglobal deblock_v_luma_intra_10, 4,7,16 %define t0 m1 %define t1 m2 %define t2 m4 @@ -655,7 +654,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,16 ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_10_%1, 4,7,16 +cglobal deblock_h_luma_intra_10, 4,7,16 %define t0 m15 %define t1 m14 %define t2 m2 @@ -714,20 +713,20 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,16 RET %endmacro -INIT_XMM -DEBLOCK_LUMA_INTRA_64 sse2 +INIT_XMM sse2 +DEBLOCK_LUMA_INTRA_64 %if HAVE_AVX -INIT_AVX -DEBLOCK_LUMA_INTRA_64 avx +INIT_XMM avx +DEBLOCK_LUMA_INTRA_64 %endif %endif -%macro DEBLOCK_LUMA_INTRA 1 +%macro DEBLOCK_LUMA_INTRA 0 ;----------------------------------------------------------------------------- ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16) +cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) LUMA_INTRA_INIT 3 lea r4, [r1*4] lea r5, [r1*3] @@ -755,7 +754,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16) ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16) +cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) LUMA_INTRA_INIT 8 %if mmsize == 8 lea r4, [r1*3] @@ -797,16 +796,16 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16) %endmacro %if ARCH_X86_64 == 0 -INIT_MMX -DEBLOCK_LUMA mmxext -DEBLOCK_LUMA_INTRA mmxext -INIT_XMM -DEBLOCK_LUMA sse2 -DEBLOCK_LUMA_INTRA sse2 +INIT_MMX mmx2 +DEBLOCK_LUMA +DEBLOCK_LUMA_INTRA +INIT_XMM sse2 +DEBLOCK_LUMA +DEBLOCK_LUMA_INTRA %if HAVE_AVX -INIT_AVX -DEBLOCK_LUMA avx -DEBLOCK_LUMA_INTRA avx +INIT_XMM avx +DEBLOCK_LUMA +DEBLOCK_LUMA_INTRA %endif %endif @@ -849,11 +848,11 @@ DEBLOCK_LUMA_INTRA avx psraw %1, 6 %endmacro -%macro DEBLOCK_CHROMA 1 +%macro DEBLOCK_CHROMA 0 ;----------------------------------------------------------------------------- ; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- -cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16) +cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) mov r5, r0 sub r0, r1 sub r0, r1 @@ -887,7 +886,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16) ;----------------------------------------------------------------------------- ; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) +cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) mov r4, r0 sub r0, r1 sub r0, r1 @@ -914,12 +913,12 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16) %endmacro %if ARCH_X86_64 == 0 -INIT_MMX -DEBLOCK_CHROMA mmxext +INIT_MMX mmx2 +DEBLOCK_CHROMA %endif -INIT_XMM -DEBLOCK_CHROMA sse2 +INIT_XMM sse2 +DEBLOCK_CHROMA %if HAVE_AVX -INIT_AVX -DEBLOCK_CHROMA avx +INIT_XMM avx +DEBLOCK_CHROMA %endif |