diff options
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 2 | ||||
-rw-r--r-- | libavcodec/x86/constants.c | 9 | ||||
-rw-r--r-- | libavcodec/x86/constants.h | 4 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel_10bit.asm | 2 | ||||
-rw-r--r-- | libavcodec/x86/hevc_mc.asm | 14 | ||||
-rw-r--r-- | libavcodec/x86/hevc_sao.asm | 2 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp.asm | 4 | ||||
-rw-r--r-- | libavcodec/x86/vp9intrapred.asm | 4 |
8 files changed, 26 insertions, 15 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index b244416..675ade3 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -32,7 +32,7 @@ pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 ; used in ff_ac3_extract_exponents() -pd_1: times 4 dd 1 +cextern pd_1 pd_151: times 4 dd 151 ; used in ff_apply_window_int16() diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c index a7cb75d..ddd009b 100644 --- a/libavcodec/x86/constants.c +++ b/libavcodec/x86/constants.c @@ -53,12 +53,19 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200 DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL, + 0x0000000000000000ULL, 0x0000000000000000ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x0202020202020202ULL, + 0x0202020202020202ULL, 0x0202020202020202ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL, 0x0303030303030303ULL, 0x0303030303030303ULL }; +DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL; DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL }; + +DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL, + 0x0000000100000001ULL, 0x0000000100000001ULL }; diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h index 094dd42..0b3c874 100644 --- a/libavcodec/x86/constants.h +++ b/libavcodec/x86/constants.h @@ -49,7 +49,9 @@ extern const xmm_reg ff_pw_2048; extern const xmm_reg ff_pw_8192; extern const ymm_reg ff_pw_m1; +extern const ymm_reg ff_pb_0; extern const ymm_reg ff_pb_1; +extern const ymm_reg ff_pb_2; extern const ymm_reg ff_pb_3; extern const xmm_reg ff_pb_80; extern const xmm_reg ff_pb_F8; @@ -57,4 +59,6 @@ extern const uint64_t ff_pb_FC; extern const xmm_reg ff_ps_neg; +extern const ymm_reg ff_pd_1; + #endif /* AVCODEC_X86_CONSTANTS_H */ diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm index d65660d..e7ce1b8 100644 --- a/libavcodec/x86/h264_qpel_10bit.asm +++ b/libavcodec/x86/h264_qpel_10bit.asm @@ -28,7 +28,7 @@ SECTION_RODATA 32 cextern pw_16 cextern pw_1 -pb_0: times 32 db 0 ; we do not use cextern here as old llvm-gcc fails to align it correctly +cextern pb_0 pw_pixel_max: times 8 dw ((1 << 10)-1) diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 04d00ce..2b016f6 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -30,8 +30,8 @@ pw_bi_12: times 16 dw (1 << 12) max_pixels_8: times 16 dw ((1 << 8)-1) max_pixels_10: times 16 dw ((1 << 10)-1) max_pixels_12: times 16 dw ((1 << 12)-1) -zero: times 8 dd 0 -one_per_32: times 8 dd 1 +cextern pd_1 +cextern pb_0 SECTION_TEXT 32 %macro EPEL_TABLE 4 @@ -665,9 +665,9 @@ QPEL_TABLE 10, 8, w, avx2 %if %2 == 8 packuswb %3, %4 %else - CLIPW %3, [zero], [max_pixels_%2] + CLIPW %3, [pb_0], [max_pixels_%2] %if (%1 > 8 && notcpuflag(avx)) || %1 > 16 - CLIPW %4, [zero], [max_pixels_%2] + CLIPW %4, [pb_0], [max_pixels_%2] %endif %endif %endmacro @@ -1428,7 +1428,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh punpcklwd m2, m2 %endif dec SHIFT - movdqu m5, [one_per_32] + movdqu m5, [pd_1] movd m6, SHIFT pshufd m2, m2, 0 mov SHIFT, oxm @@ -1465,7 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh %if %2 == 8 packuswb m0, m0 %else - CLIPW m0, [zero], [max_pixels_%2] + CLIPW m0, [pb_0], [max_pixels_%2] %endif PEL_%2STORE%1 dstq, m0, m1 add dstq, dststrideq ; dst += dststride @@ -1536,7 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2, %if %2 == 8 packuswb m0, m0 %else - CLIPW m0, [zero], [max_pixels_%2] + CLIPW m0, [pb_0], [max_pixels_%2] %endif PEL_%2STORE%1 dstq, m0, m1 add dstq, dststrideq ; dst += dststride diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm index f4eca0c..8202236 100644 --- a/libavcodec/x86/hevc_sao.asm +++ b/libavcodec/x86/hevc_sao.asm @@ -27,7 +27,6 @@ SECTION_RODATA 32 pw_mask10: times 16 dw 0x03FF pw_mask12: times 16 dw 0x0FFF -pb_2: times 32 db 2 pw_m2: times 16 dw -2 pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1 @@ -35,6 +34,7 @@ cextern pw_m1 cextern pw_1 cextern pw_2 cextern pb_1 +cextern pb_2 SECTION_TEXT diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index cc48556..85ee56d 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -23,7 +23,7 @@ %include "libavutil/x86/x86util.asm" SECTION_RODATA -pb_f: times 16 db 15 +cextern pb_15 pb_zzzzzzzz77777777: times 8 db -1 pb_7: times 8 db 7 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 @@ -157,7 +157,7 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left INIT_XMM sse4 cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left - mova m5, [pb_f] + mova m5, [pb_15] mova m6, [pb_zzzzzzzz77777777] mova m4, [pb_zzzz3333zzzzbbbb] mova m3, [pb_zz11zz55zz99zzdd] diff --git a/libavcodec/x86/vp9intrapred.asm b/libavcodec/x86/vp9intrapred.asm index 169676f..31f7d44 100644 --- a/libavcodec/x86/vp9intrapred.asm +++ b/libavcodec/x86/vp9intrapred.asm @@ -64,8 +64,6 @@ pb_6xm1_BDF_0to6: times 6 db -1 db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6 pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 -pb_2: times 32 db 2 -pb_15: times 16 db 15 pb_15x0_1xm1: times 15 db 0 db -1 pb_0to2_5x3: db 0, 1, 2 @@ -76,7 +74,9 @@ pb_6x0_2xm1: times 6 db 0 times 2 db -1 cextern pb_1 +cextern pb_2 cextern pb_3 +cextern pb_15 cextern pw_2 cextern pw_4 cextern pw_8 |