summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libswscale/x86/input.asm20
1 files changed, 19 insertions, 1 deletions
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index 1f0f1ef..af9afca 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -347,8 +347,9 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
%if ARCH_X86_64
movsxd wq, wd
%endif
- lea srcq, [srcq+wq*4]
add wq, wq
+ sub wq, mmsize - 1
+ lea srcq, [srcq+wq*2]
add dstq, wq
neg wq
mova m4, [rgb_Yrnd]
@@ -373,6 +374,23 @@ cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
mova [dstq+wq], m0
add wq, mmsize
jl .loop
+ sub wq, mmsize - 1
+ jz .end
+ add srcq, 2*mmsize - 2
+ add dstq, mmsize - 1
+.loop2:
+ movd m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
+ pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
+ pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
+ paddd m0, m4 ; += rgb_Yrnd
+ paddd m0, m1 ; (dword) { Y[0-3] }
+ psrad m0, 9
+ packssdw m0, m0 ; (word) { Y[0-7] }
+ movd [dstq+wq], m0
+ add wq, 2
+ jl .loop2
+.end:
REP_RET
%endif ; %0 == 3
%endmacro
OpenPOWER on IntegriCloud