diff options
Diffstat (limited to 'libavcodec/arm/h264idct_neon.S')
-rw-r--r-- | libavcodec/arm/h264idct_neon.S | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S index b23ddb1..fa5b90c 100644 --- a/libavcodec/arm/h264idct_neon.S +++ b/libavcodec/arm/h264idct_neon.S @@ -1,20 +1,20 @@ /* * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -22,9 +22,12 @@ function ff_h264_idct_add_neon, export=1 vld1.64 {d0-d3}, [r1,:128] + vmov.i16 q15, #0 vswp d1, d2 + vst1.16 {q15}, [r1,:128]! vadd.i16 d4, d0, d1 + vst1.16 {q15}, [r1,:128]! vshr.s16 q8, q1, #1 vsub.i16 d5, d0, d1 vadd.i16 d6, d2, d17 @@ -65,11 +68,14 @@ function ff_h264_idct_add_neon, export=1 vst1.32 {d0[1]}, [r0,:32], r2 vst1.32 {d1[0]}, [r0,:32], r2 + sub r1, r1, #32 bx lr endfunc function ff_h264_idct_dc_add_neon, export=1 + mov r3, #0 vld1.16 {d2[],d3[]}, [r1,:16] + strh r3, [r1] vrshr.s16 q1, q1, #6 vld1.32 {d0[0]}, [r0,:32], r2 vld1.32 {d0[1]}, [r0,:32], r2 @@ -148,7 +154,7 @@ function ff_h264_idct_add8_neon, export=1 add r5, r1, #16*4 add r1, r2, #16*32 mov r2, r3 - mov r3, r1 + mov r10, r1 ldr r6, [sp, #32] movrel r7, scan8+16 mov r12, #0 @@ -156,7 +162,7 @@ function ff_h264_idct_add8_neon, export=1 ldr r0, [r5, r12, lsl #2] ldrb r8, [r6, r8] add r0, r0, r4 - add r1, r3, r12, lsl #5 + add r1, r10, r12, lsl #5 cmp r8, #0 ldrsh r8, [r1] iteet ne @@ -180,7 +186,9 @@ endfunc qb .req q14 vshr.s16 q2, q10, #1 vadd.i16 q0, q8, q12 - vld1.16 {q14-q15},[r1,:128]! + vld1.16 {q14-q15},[r1,:128] + vst1.16 {q7}, [r1,:128]! + vst1.16 {q7}, [r1,:128]! vsub.i16 q1, q8, q12 vshr.s16 q3, q14, #1 vsub.i16 q2, q2, q14 @@ -259,9 +267,16 @@ endfunc .endm function ff_h264_idct8_add_neon, export=1 - vld1.16 {q8-q9}, [r1,:128]! - vld1.16 {q10-q11},[r1,:128]! - vld1.16 {q12-q13},[r1,:128]! + vmov.i16 q7, #0 + vld1.16 {q8-q9}, [r1,:128] + vst1.16 {q7}, [r1,:128]! + vst1.16 {q7}, [r1,:128]! + vld1.16 {q10-q11},[r1,:128] + vst1.16 {q7}, [r1,:128]! + vst1.16 {q7}, [r1,:128]! + vld1.16 {q12-q13},[r1,:128] + vst1.16 {q7}, [r1,:128]! + vst1.16 {q7}, [r1,:128]! idct8x8_cols 0 idct8x8_cols 1 @@ -313,7 +328,9 @@ function ff_h264_idct8_add_neon, export=1 endfunc function ff_h264_idct8_dc_add_neon, export=1 + mov r3, #0 vld1.16 {d30[],d31[]},[r1,:16] + strh r3, [r1] vld1.32 {d0}, [r0,:64], r2 vrshr.s16 q15, q15, #6 vld1.32 {d1}, [r0,:64], r2 |