summaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2015-02-01 15:13:45 -0300
committerJames Almer <jamrial@gmail.com>2015-02-01 20:22:54 -0300
commitbff7feb328d8d3fd234f920cb45e0ebdbdd7b407 (patch)
tree2c26a443bdb0d5bf21f815a632c48e205c73b66e /libavcodec/x86
parentfa3eccb4f9f3ecc9e2bb3c5924c2aa343b808076 (diff)
downloadffmpeg-streaming-bff7feb328d8d3fd234f920cb45e0ebdbdd7b407.zip
ffmpeg-streaming-bff7feb328d8d3fd234f920cb45e0ebdbdd7b407.tar.gz
x86: hevc/sao: aligned source buffers
Usefull for at least band filter, for which: - Band filter call only: 32 64 Before: 16556 54015 After: 16497 52355 - Whole case: 32 64 Before: 37031 103008 After: 32045 93952
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/hevc_sao.asm28
1 files changed, 14 insertions, 14 deletions
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 7f36fd0..4c11730 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -104,26 +104,26 @@ align 16
%assign i 0
%rep %2
- movu m13, [srcq + i]
+ mova m13, [srcq + i]
punpcklbw m8, m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
punpckhbw m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
packuswb m8, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
%assign i i+mmsize
%endrep
%if %1 == 48
INIT_XMM cpuname
- movu m13, [srcq + i]
+ mova m13, [srcq + i]
punpcklbw m8, m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
punpckhbw m13, m14
HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
packuswb m8, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
%assign i i+16
%endif ; %1 == 48
@@ -143,37 +143,37 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, dst, src, dststride, srcstride, of
align 16
.loop
%if %2 == 8
- movu m8, [srcq]
+ mova m8, [srcq]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
CLIPW m8, m14, m13
- movu [dstq], m8
+ mova [dstq], m8
%endif
%assign i 0
%rep %3
- movu m8, [srcq + i]
+ mova m8, [srcq + i]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
CLIPW m8, m14, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
- movu m9, [srcq + i + mmsize]
+ mova m9, [srcq + i + mmsize]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
CLIPW m9, m14, m13
- movu [dstq + i + mmsize], m9
+ mova [dstq + i + mmsize], m9
%assign i i+mmsize*2
%endrep
%if %2 == 48
INIT_XMM cpuname
- movu m8, [srcq + i]
+ mova m8, [srcq + i]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
CLIPW m8, m14, m13
- movu [dstq + i], m8
+ mova [dstq + i], m8
- movu m9, [srcq + i + mmsize]
+ mova m9, [srcq + i + mmsize]
HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
CLIPW m9, m14, m13
- movu [dstq + i + mmsize], m9
+ mova [dstq + i + mmsize], m9
%assign i i+32
%endif ; %1 == 48
OpenPOWER on IntegriCloud