diff options
Diffstat (limited to 'libavcodec/x86/vorbisdsp.asm')
-rw-r--r-- | libavcodec/x86/vorbisdsp.asm | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm index c54650e..d952296 100644 --- a/libavcodec/x86/vorbisdsp.asm +++ b/libavcodec/x86/vorbisdsp.asm @@ -2,20 +2,20 @@ ;* Vorbis x86 optimizations ;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> ;* -;* This file is part of Libav. +;* This file is part of FFmpeg. ;* -;* Libav is free software; you can redistribute it and/or +;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* -;* Libav is distributed in the hope that it will be useful, +;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public -;* License along with Libav; if not, write to the Free Software +;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** @@ -57,13 +57,17 @@ cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size %endif INIT_XMM sse -cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr +cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size mova m5, [pdw_80000000] - xor cntrq, cntrq + shl block_sized, 2 + add magq, block_sizeq + add angq, block_sizeq + neg block_sizeq + align 16 .loop: - mova m0, [magq+cntrq*4] - mova m1, [angq+cntrq*4] + mova m0, [magq+block_sizeq] + mova m1, [angq+block_sizeq] xorps m2, m2 xorps m3, m3 cmpleps m2, m0 ; m <= 0.0 @@ -75,9 +79,8 @@ align 16 andnps m4, m1 addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) - mova [angq+cntrq*4], m3 - mova [magq+cntrq*4], m0 - add cntrq, 4 - cmp cntrq, block_sizeq + mova [angq+block_sizeq], m3 + mova [magq+block_sizeq], m0 + add block_sizeq, mmsize jl .loop RET |