diff options
author | Mickaƫl Raulet <mraulet@insa-rennes.fr> | 2014-07-25 18:55:23 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-26 01:55:20 +0200 |
commit | bd0f2d316faebfa1052b27421824c3f8dcbd78a8 (patch) | |
tree | 571ba47e386361481715ad54d3c61b558f4dd963 /libavcodec/x86/hevc_mc.asm | |
parent | 7df98d8c4d970e830619d19eb74687e7a3dfceb8 (diff) | |
download | ffmpeg-streaming-bd0f2d316faebfa1052b27421824c3f8dcbd78a8.zip ffmpeg-streaming-bd0f2d316faebfa1052b27421824c3f8dcbd78a8.tar.gz |
x86/hevc: add 12bits support for MC
cherry picked from commit 3fcb7a4595a6f40100a22110a5805e3b7510c0fd
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hevc_mc.asm')
-rw-r--r-- | libavcodec/x86/hevc_mc.asm | 63 |
1 files changed, 58 insertions, 5 deletions
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 5cf37d0..81b7d9e 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -21,11 +21,14 @@ %include "libavutil/x86/x86util.asm" SECTION_RODATA -pw_8: times 8 dw 512 -pw_10: times 8 dw 2048 -pw_bi_8: times 8 dw 256 -pw_bi_10: times 8 dw 1024 -max_pixels_10: times 8 dw 1023 +pw_8: times 8 dw (1 << 9) +pw_10: times 8 dw (1 << 11) +pw_12: times 8 dw (1 << 13) +pw_bi_8: times 8 dw (1 << 8) +pw_bi_10: times 8 dw (1 << 10) +pw_bi_12: times 8 dw (1 << 12) +max_pixels_10: times 8 dw ((1 << 10)-1) +max_pixels_12: times 8 dw ((1 << 12)-1) zero: times 4 dd 0 one_per_32: times 4 dd 1 @@ -51,6 +54,7 @@ hevc_epel_filters_%4_%1 times %2 d%3 -2, 58 EPEL_TABLE 8, 8, b, sse4 EPEL_TABLE 10, 4, w, sse4 +EPEL_TABLE 12, 4, w, sse4 %macro QPEL_TABLE 4 hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4 @@ -69,6 +73,7 @@ hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4 QPEL_TABLE 8, 8, b, sse4 QPEL_TABLE 10, 4, w, sse4 +QPEL_TABLE 12, 4, w, sse4 %define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10 @@ -295,6 +300,29 @@ QPEL_TABLE 10, 4, w, sse4 %endif %endmacro +%macro PEL_12STORE2 3 + movd [%1], %2 +%endmacro +%macro PEL_12STORE4 3 + movq [%1], %2 +%endmacro +%macro PEL_12STORE6 3 + movq [%1], %2 + psrldq %2, 8 + movd [%1+8], %2 +%endmacro +%macro PEL_12STORE8 3 + movdqa [%1], %2 +%endmacro +%macro PEL_12STORE12 3 + movdqa [%1], %2 + movq [%1+16], %3 +%endmacro +%macro PEL_12STORE16 3 + PEL_12STORE8 %1, %2, %3 + movdqa [%1+16], %3 +%endmacro + %macro PEL_10STORE2 3 movd [%1], %2 %endmacro @@ -1229,6 +1257,11 @@ WEIGHTING_FUNCS 4, 10 WEIGHTING_FUNCS 6, 10 WEIGHTING_FUNCS 8, 10 +WEIGHTING_FUNCS 2, 12 +WEIGHTING_FUNCS 4, 12 +WEIGHTING_FUNCS 6, 12 +WEIGHTING_FUNCS 8, 12 + HEVC_PUT_HEVC_PEL_PIXELS 2, 8 HEVC_PUT_HEVC_PEL_PIXELS 4, 8 HEVC_PUT_HEVC_PEL_PIXELS 6, 8 @@ -1241,6 +1274,10 @@ HEVC_PUT_HEVC_PEL_PIXELS 4, 10 HEVC_PUT_HEVC_PEL_PIXELS 6, 10 HEVC_PUT_HEVC_PEL_PIXELS 8, 10 +HEVC_PUT_HEVC_PEL_PIXELS 2, 12 +HEVC_PUT_HEVC_PEL_PIXELS 4, 12 +HEVC_PUT_HEVC_PEL_PIXELS 6, 12 +HEVC_PUT_HEVC_PEL_PIXELS 8, 12 HEVC_PUT_HEVC_EPEL 2, 8 HEVC_PUT_HEVC_EPEL 4, 8 @@ -1255,6 +1292,10 @@ HEVC_PUT_HEVC_EPEL 4, 10 HEVC_PUT_HEVC_EPEL 6, 10 HEVC_PUT_HEVC_EPEL 8, 10 +HEVC_PUT_HEVC_EPEL 2, 12 +HEVC_PUT_HEVC_EPEL 4, 12 +HEVC_PUT_HEVC_EPEL 6, 12 +HEVC_PUT_HEVC_EPEL 8, 12 HEVC_PUT_HEVC_EPEL_HV 2, 8 HEVC_PUT_HEVC_EPEL_HV 4, 8 @@ -1266,6 +1307,10 @@ HEVC_PUT_HEVC_EPEL_HV 4, 10 HEVC_PUT_HEVC_EPEL_HV 6, 10 HEVC_PUT_HEVC_EPEL_HV 8, 10 +HEVC_PUT_HEVC_EPEL_HV 2, 12 +HEVC_PUT_HEVC_EPEL_HV 4, 12 +HEVC_PUT_HEVC_EPEL_HV 6, 12 +HEVC_PUT_HEVC_EPEL_HV 8, 12 HEVC_PUT_HEVC_QPEL 4, 8 HEVC_PUT_HEVC_QPEL 8, 8 @@ -1275,6 +1320,9 @@ HEVC_PUT_HEVC_QPEL 16, 8 HEVC_PUT_HEVC_QPEL 4, 10 HEVC_PUT_HEVC_QPEL 8, 10 +HEVC_PUT_HEVC_QPEL 4, 12 +HEVC_PUT_HEVC_QPEL 8, 12 + HEVC_PUT_HEVC_QPEL_HV 2, 8 HEVC_PUT_HEVC_QPEL_HV 4, 8 HEVC_PUT_HEVC_QPEL_HV 6, 8 @@ -1285,4 +1333,9 @@ HEVC_PUT_HEVC_QPEL_HV 4, 10 HEVC_PUT_HEVC_QPEL_HV 6, 10 HEVC_PUT_HEVC_QPEL_HV 8, 10 +HEVC_PUT_HEVC_QPEL_HV 2, 12 +HEVC_PUT_HEVC_QPEL_HV 4, 12 +HEVC_PUT_HEVC_QPEL_HV 6, 12 +HEVC_PUT_HEVC_QPEL_HV 8, 12 + %endif ; ARCH_X86_64 |