summaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/ac3dsp.asm2
-rw-r--r--libavcodec/x86/constants.c9
-rw-r--r--libavcodec/x86/constants.h4
-rw-r--r--libavcodec/x86/h264_qpel_10bit.asm2
-rw-r--r--libavcodec/x86/hevc_mc.asm14
-rw-r--r--libavcodec/x86/hevc_sao.asm2
-rw-r--r--libavcodec/x86/huffyuvdsp.asm4
-rw-r--r--libavcodec/x86/vp9intrapred.asm4
8 files changed, 26 insertions, 15 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index b244416..675ade3 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -32,7 +32,7 @@ pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
; used in ff_ac3_extract_exponents()
-pd_1: times 4 dd 1
+cextern pd_1
pd_151: times 4 dd 151
; used in ff_apply_window_int16()
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index a7cb75d..ddd009b 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -53,12 +53,19 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
-DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL,
+ 0x0000000000000000ULL, 0x0000000000000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL,
0x0101010101010101ULL, 0x0101010101010101ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x0202020202020202ULL,
+ 0x0202020202020202ULL, 0x0202020202020202ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL,
0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL };
+
+DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL,
+ 0x0000000100000001ULL, 0x0000000100000001ULL };
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 094dd42..0b3c874 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -49,7 +49,9 @@ extern const xmm_reg ff_pw_2048;
extern const xmm_reg ff_pw_8192;
extern const ymm_reg ff_pw_m1;
+extern const ymm_reg ff_pb_0;
extern const ymm_reg ff_pb_1;
+extern const ymm_reg ff_pb_2;
extern const ymm_reg ff_pb_3;
extern const xmm_reg ff_pb_80;
extern const xmm_reg ff_pb_F8;
@@ -57,4 +59,6 @@ extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_ps_neg;
+extern const ymm_reg ff_pd_1;
+
#endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index d65660d..e7ce1b8 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -28,7 +28,7 @@ SECTION_RODATA 32
cextern pw_16
cextern pw_1
-pb_0: times 32 db 0 ; we do not use cextern here as old llvm-gcc fails to align it correctly
+cextern pb_0
pw_pixel_max: times 8 dw ((1 << 10)-1)
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 04d00ce..2b016f6 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -30,8 +30,8 @@ pw_bi_12: times 16 dw (1 << 12)
max_pixels_8: times 16 dw ((1 << 8)-1)
max_pixels_10: times 16 dw ((1 << 10)-1)
max_pixels_12: times 16 dw ((1 << 12)-1)
-zero: times 8 dd 0
-one_per_32: times 8 dd 1
+cextern pd_1
+cextern pb_0
SECTION_TEXT 32
%macro EPEL_TABLE 4
@@ -665,9 +665,9 @@ QPEL_TABLE 10, 8, w, avx2
%if %2 == 8
packuswb %3, %4
%else
- CLIPW %3, [zero], [max_pixels_%2]
+ CLIPW %3, [pb_0], [max_pixels_%2]
%if (%1 > 8 && notcpuflag(avx)) || %1 > 16
- CLIPW %4, [zero], [max_pixels_%2]
+ CLIPW %4, [pb_0], [max_pixels_%2]
%endif
%endif
%endmacro
@@ -1428,7 +1428,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
punpcklwd m2, m2
%endif
dec SHIFT
- movdqu m5, [one_per_32]
+ movdqu m5, [pd_1]
movd m6, SHIFT
pshufd m2, m2, 0
mov SHIFT, oxm
@@ -1465,7 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
%if %2 == 8
packuswb m0, m0
%else
- CLIPW m0, [zero], [max_pixels_%2]
+ CLIPW m0, [pb_0], [max_pixels_%2]
%endif
PEL_%2STORE%1 dstq, m0, m1
add dstq, dststrideq ; dst += dststride
@@ -1536,7 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2,
%if %2 == 8
packuswb m0, m0
%else
- CLIPW m0, [zero], [max_pixels_%2]
+ CLIPW m0, [pb_0], [max_pixels_%2]
%endif
PEL_%2STORE%1 dstq, m0, m1
add dstq, dststrideq ; dst += dststride
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index f4eca0c..8202236 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -27,7 +27,6 @@ SECTION_RODATA 32
pw_mask10: times 16 dw 0x03FF
pw_mask12: times 16 dw 0x0FFF
-pb_2: times 32 db 2
pw_m2: times 16 dw -2
pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
@@ -35,6 +34,7 @@ cextern pw_m1
cextern pw_1
cextern pw_2
cextern pb_1
+cextern pb_2
SECTION_TEXT
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index cc48556..85ee56d 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -23,7 +23,7 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
-pb_f: times 16 db 15
+cextern pb_15
pb_zzzzzzzz77777777: times 8 db -1
pb_7: times 8 db 7
pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
@@ -157,7 +157,7 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
INIT_XMM sse4
cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
- mova m5, [pb_f]
+ mova m5, [pb_15]
mova m6, [pb_zzzzzzzz77777777]
mova m4, [pb_zzzz3333zzzzbbbb]
mova m3, [pb_zz11zz55zz99zzdd]
diff --git a/libavcodec/x86/vp9intrapred.asm b/libavcodec/x86/vp9intrapred.asm
index 169676f..31f7d44 100644
--- a/libavcodec/x86/vp9intrapred.asm
+++ b/libavcodec/x86/vp9intrapred.asm
@@ -64,8 +64,6 @@ pb_6xm1_BDF_0to6: times 6 db -1
db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
-pb_2: times 32 db 2
-pb_15: times 16 db 15
pb_15x0_1xm1: times 15 db 0
db -1
pb_0to2_5x3: db 0, 1, 2
@@ -76,7 +74,9 @@ pb_6x0_2xm1: times 6 db 0
times 2 db -1
cextern pb_1
+cextern pb_2
cextern pb_3
+cextern pb_15
cextern pw_2
cextern pw_4
cextern pw_8
OpenPOWER on IntegriCloud