summaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/hevc_mc.asm
diff options
context:
space:
mode:
authorMickaƫl Raulet <mraulet@insa-rennes.fr>2014-07-25 18:55:23 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-07-26 01:55:20 +0200
commitbd0f2d316faebfa1052b27421824c3f8dcbd78a8 (patch)
tree571ba47e386361481715ad54d3c61b558f4dd963 /libavcodec/x86/hevc_mc.asm
parent7df98d8c4d970e830619d19eb74687e7a3dfceb8 (diff)
downloadffmpeg-streaming-bd0f2d316faebfa1052b27421824c3f8dcbd78a8.zip
ffmpeg-streaming-bd0f2d316faebfa1052b27421824c3f8dcbd78a8.tar.gz
x86/hevc: add 12bits support for MC
cherry picked from commit 3fcb7a4595a6f40100a22110a5805e3b7510c0fd Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hevc_mc.asm')
-rw-r--r--libavcodec/x86/hevc_mc.asm63
1 files changed, 58 insertions, 5 deletions
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 5cf37d0..81b7d9e 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -21,11 +21,14 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
-pw_8: times 8 dw 512
-pw_10: times 8 dw 2048
-pw_bi_8: times 8 dw 256
-pw_bi_10: times 8 dw 1024
-max_pixels_10: times 8 dw 1023
+pw_8: times 8 dw (1 << 9)
+pw_10: times 8 dw (1 << 11)
+pw_12: times 8 dw (1 << 13)
+pw_bi_8: times 8 dw (1 << 8)
+pw_bi_10: times 8 dw (1 << 10)
+pw_bi_12: times 8 dw (1 << 12)
+max_pixels_10: times 8 dw ((1 << 10)-1)
+max_pixels_12: times 8 dw ((1 << 12)-1)
zero: times 4 dd 0
one_per_32: times 4 dd 1
@@ -51,6 +54,7 @@ hevc_epel_filters_%4_%1 times %2 d%3 -2, 58
EPEL_TABLE 8, 8, b, sse4
EPEL_TABLE 10, 4, w, sse4
+EPEL_TABLE 12, 4, w, sse4
%macro QPEL_TABLE 4
hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4
@@ -69,6 +73,7 @@ hevc_qpel_filters_%4_%1 times %2 d%3 -1, 4
QPEL_TABLE 8, 8, b, sse4
QPEL_TABLE 10, 4, w, sse4
+QPEL_TABLE 12, 4, w, sse4
%define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10
@@ -295,6 +300,29 @@ QPEL_TABLE 10, 4, w, sse4
%endif
%endmacro
+%macro PEL_12STORE2 3
+ movd [%1], %2
+%endmacro
+%macro PEL_12STORE4 3
+ movq [%1], %2
+%endmacro
+%macro PEL_12STORE6 3
+ movq [%1], %2
+ psrldq %2, 8
+ movd [%1+8], %2
+%endmacro
+%macro PEL_12STORE8 3
+ movdqa [%1], %2
+%endmacro
+%macro PEL_12STORE12 3
+ movdqa [%1], %2
+ movq [%1+16], %3
+%endmacro
+%macro PEL_12STORE16 3
+ PEL_12STORE8 %1, %2, %3
+ movdqa [%1+16], %3
+%endmacro
+
%macro PEL_10STORE2 3
movd [%1], %2
%endmacro
@@ -1229,6 +1257,11 @@ WEIGHTING_FUNCS 4, 10
WEIGHTING_FUNCS 6, 10
WEIGHTING_FUNCS 8, 10
+WEIGHTING_FUNCS 2, 12
+WEIGHTING_FUNCS 4, 12
+WEIGHTING_FUNCS 6, 12
+WEIGHTING_FUNCS 8, 12
+
HEVC_PUT_HEVC_PEL_PIXELS 2, 8
HEVC_PUT_HEVC_PEL_PIXELS 4, 8
HEVC_PUT_HEVC_PEL_PIXELS 6, 8
@@ -1241,6 +1274,10 @@ HEVC_PUT_HEVC_PEL_PIXELS 4, 10
HEVC_PUT_HEVC_PEL_PIXELS 6, 10
HEVC_PUT_HEVC_PEL_PIXELS 8, 10
+HEVC_PUT_HEVC_PEL_PIXELS 2, 12
+HEVC_PUT_HEVC_PEL_PIXELS 4, 12
+HEVC_PUT_HEVC_PEL_PIXELS 6, 12
+HEVC_PUT_HEVC_PEL_PIXELS 8, 12
HEVC_PUT_HEVC_EPEL 2, 8
HEVC_PUT_HEVC_EPEL 4, 8
@@ -1255,6 +1292,10 @@ HEVC_PUT_HEVC_EPEL 4, 10
HEVC_PUT_HEVC_EPEL 6, 10
HEVC_PUT_HEVC_EPEL 8, 10
+HEVC_PUT_HEVC_EPEL 2, 12
+HEVC_PUT_HEVC_EPEL 4, 12
+HEVC_PUT_HEVC_EPEL 6, 12
+HEVC_PUT_HEVC_EPEL 8, 12
HEVC_PUT_HEVC_EPEL_HV 2, 8
HEVC_PUT_HEVC_EPEL_HV 4, 8
@@ -1266,6 +1307,10 @@ HEVC_PUT_HEVC_EPEL_HV 4, 10
HEVC_PUT_HEVC_EPEL_HV 6, 10
HEVC_PUT_HEVC_EPEL_HV 8, 10
+HEVC_PUT_HEVC_EPEL_HV 2, 12
+HEVC_PUT_HEVC_EPEL_HV 4, 12
+HEVC_PUT_HEVC_EPEL_HV 6, 12
+HEVC_PUT_HEVC_EPEL_HV 8, 12
HEVC_PUT_HEVC_QPEL 4, 8
HEVC_PUT_HEVC_QPEL 8, 8
@@ -1275,6 +1320,9 @@ HEVC_PUT_HEVC_QPEL 16, 8
HEVC_PUT_HEVC_QPEL 4, 10
HEVC_PUT_HEVC_QPEL 8, 10
+HEVC_PUT_HEVC_QPEL 4, 12
+HEVC_PUT_HEVC_QPEL 8, 12
+
HEVC_PUT_HEVC_QPEL_HV 2, 8
HEVC_PUT_HEVC_QPEL_HV 4, 8
HEVC_PUT_HEVC_QPEL_HV 6, 8
@@ -1285,4 +1333,9 @@ HEVC_PUT_HEVC_QPEL_HV 4, 10
HEVC_PUT_HEVC_QPEL_HV 6, 10
HEVC_PUT_HEVC_QPEL_HV 8, 10
+HEVC_PUT_HEVC_QPEL_HV 2, 12
+HEVC_PUT_HEVC_QPEL_HV 4, 12
+HEVC_PUT_HEVC_QPEL_HV 6, 12
+HEVC_PUT_HEVC_QPEL_HV 8, 12
+
%endif ; ARCH_X86_64
OpenPOWER on IntegriCloud