diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2009-12-05 17:53:11 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2009-12-05 17:53:11 +0000 |
commit | a4605efdf51f1a900d0a6404c5a938f4443416bf (patch) | |
tree | 2c68f665a4125a200460fb230a9ba25ad368c157 /libavcodec | |
parent | 91e644ff77ef70fc4a8fa4766d24afa281fe0d3b (diff) | |
download | ffmpeg-streaming-a4605efdf51f1a900d0a6404c5a938f4443416bf.zip ffmpeg-streaming-a4605efdf51f1a900d0a6404c5a938f4443416bf.tar.gz |
slightly faster scalarproduct_and_madd_int16_ssse3 on penryn, no change on conroe
Originally committed as revision 20743 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 96080be..6bc1934 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -202,12 +202,20 @@ align 16 mova m2, [v3q + orderq] mova m3, [v3q + orderq + mmsize] %endif - pmaddwd m0, [v1q + orderq] - pmaddwd m1, [v1q + orderq + mmsize] + %define t0 [v1q + orderq] + %define t1 [v1q + orderq + mmsize] +%ifdef ARCH_X86_64 + mova m8, t0 + mova m9, t1 + %define t0 m8 + %define t1 m9 +%endif + pmaddwd m0, t0 + pmaddwd m1, t1 pmullw m2, m7 pmullw m3, m7 - paddw m2, [v1q + orderq] - paddw m3, [v1q + orderq + mmsize] + paddw m2, t0 + paddw m3, t1 paddd m6, m0 paddd m6, m1 mova [v1q + orderq], m2 @@ -219,7 +227,7 @@ align 16 %endmacro ; int scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul) -cglobal scalarproduct_and_madd_int16_ssse3, 4,5,8, v1, v2, v3, order, mul +cglobal scalarproduct_and_madd_int16_ssse3, 4,5,10, v1, v2, v3, order, mul shl orderq, 1 movd m7, mulm pshuflw m7, m7, 0 |