diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2015-02-01 15:13:45 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2015-02-01 20:22:54 -0300 |
commit | bff7feb328d8d3fd234f920cb45e0ebdbdd7b407 (patch) | |
tree | 2c26a443bdb0d5bf21f815a632c48e205c73b66e /libavcodec/x86 | |
parent | fa3eccb4f9f3ecc9e2bb3c5924c2aa343b808076 (diff) | |
download | ffmpeg-streaming-bff7feb328d8d3fd234f920cb45e0ebdbdd7b407.zip ffmpeg-streaming-bff7feb328d8d3fd234f920cb45e0ebdbdd7b407.tar.gz |
x86: hevc/sao: aligned source buffers
Usefull for at least band filter, for which:
- Band filter call only:
32 64
Before: 16556 54015
After: 16497 52355
- Whole case:
32 64
Before: 37031 103008
After: 32045 93952
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/hevc_sao.asm | 28 |
1 files changed, 14 insertions, 14 deletions
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm index 7f36fd0..4c11730 100644 --- a/libavcodec/x86/hevc_sao.asm +++ b/libavcodec/x86/hevc_sao.asm @@ -104,26 +104,26 @@ align 16 %assign i 0 %rep %2 - movu m13, [srcq + i] + mova m13, [srcq + i] punpcklbw m8, m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8 punpckhbw m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13 packuswb m8, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 %assign i i+mmsize %endrep %if %1 == 48 INIT_XMM cpuname - movu m13, [srcq + i] + mova m13, [srcq + i] punpcklbw m8, m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8 punpckhbw m13, m14 HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13 packuswb m8, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 %assign i i+16 %endif ; %1 == 48 @@ -143,37 +143,37 @@ cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, dst, src, dststride, srcstride, of align 16 .loop %if %2 == 8 - movu m8, [srcq] + mova m8, [srcq] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 CLIPW m8, m14, m13 - movu [dstq], m8 + mova [dstq], m8 %endif %assign i 0 %rep %3 - movu m8, [srcq + i] + mova m8, [srcq + i] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 CLIPW m8, m14, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 - movu m9, [srcq + i + mmsize] + mova m9, [srcq + i + mmsize] HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9 CLIPW m9, m14, m13 - movu [dstq + i + mmsize], m9 + mova [dstq + i + mmsize], m9 %assign i i+mmsize*2 %endrep %if %2 == 48 INIT_XMM cpuname - movu m8, [srcq + i] + mova m8, [srcq + i] HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8 CLIPW m8, m14, m13 - movu [dstq + i], m8 + mova [dstq + i], m8 - movu m9, [srcq + i + mmsize] + mova m9, [srcq + i + mmsize] HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9 CLIPW m9, m14, m13 - movu [dstq + i + mmsize], m9 + mova [dstq + i + mmsize], m9 %assign i i+32 %endif ; %1 == 48 |