summaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2009-12-05 17:53:11 +0000
committerLoren Merritt <lorenm@u.washington.edu>2009-12-05 17:53:11 +0000
commita4605efdf51f1a900d0a6404c5a938f4443416bf (patch)
tree2c68f665a4125a200460fb230a9ba25ad368c157 /libavcodec
parent91e644ff77ef70fc4a8fa4766d24afa281fe0d3b (diff)
downloadffmpeg-streaming-a4605efdf51f1a900d0a6404c5a938f4443416bf.zip
ffmpeg-streaming-a4605efdf51f1a900d0a6404c5a938f4443416bf.tar.gz
slightly faster scalarproduct_and_madd_int16_ssse3 on penryn, no change on conroe
Originally committed as revision 20743 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/dsputil_yasm.asm18
1 files changed, 13 insertions, 5 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 96080be..6bc1934 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -202,12 +202,20 @@ align 16
mova m2, [v3q + orderq]
mova m3, [v3q + orderq + mmsize]
%endif
- pmaddwd m0, [v1q + orderq]
- pmaddwd m1, [v1q + orderq + mmsize]
+ %define t0 [v1q + orderq]
+ %define t1 [v1q + orderq + mmsize]
+%ifdef ARCH_X86_64
+ mova m8, t0
+ mova m9, t1
+ %define t0 m8
+ %define t1 m9
+%endif
+ pmaddwd m0, t0
+ pmaddwd m1, t1
pmullw m2, m7
pmullw m3, m7
- paddw m2, [v1q + orderq]
- paddw m3, [v1q + orderq + mmsize]
+ paddw m2, t0
+ paddw m3, t1
paddd m6, m0
paddd m6, m1
mova [v1q + orderq], m2
@@ -219,7 +227,7 @@ align 16
%endmacro
; int scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
-cglobal scalarproduct_and_madd_int16_ssse3, 4,5,8, v1, v2, v3, order, mul
+cglobal scalarproduct_and_madd_int16_ssse3, 4,5,10, v1, v2, v3, order, mul
shl orderq, 1
movd m7, mulm
pshuflw m7, m7, 0
OpenPOWER on IntegriCloud