1 files changed, 27 insertions, 10 deletions
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index b23ddb1..fa5b90c 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -22,9 +22,12 @@
 
 function ff_h264_idct_add_neon, export=1
         vld1.64         {d0-d3},  [r1,:128]
+        vmov.i16        q15, #0
 
         vswp            d1,  d2
+        vst1.16         {q15},    [r1,:128]!
         vadd.i16        d4,  d0,  d1
+        vst1.16         {q15},    [r1,:128]!
         vshr.s16        q8,  q1,  #1
         vsub.i16        d5,  d0,  d1
         vadd.i16        d6,  d2,  d17
@@ -65,11 +68,14 @@ function ff_h264_idct_add_neon, export=1
         vst1.32         {d0[1]},  [r0,:32], r2
         vst1.32         {d1[0]},  [r0,:32], r2
 
+        sub             r1,  r1,  #32
         bx              lr
 endfunc
 
 function ff_h264_idct_dc_add_neon, export=1
+        mov             r3,       #0
         vld1.16         {d2[],d3[]}, [r1,:16]
+        strh            r3,       [r1]
         vrshr.s16       q1,  q1,  #6
         vld1.32         {d0[0]},  [r0,:32], r2
         vld1.32         {d0[1]},  [r0,:32], r2
@@ -148,7 +154,7 @@ function ff_h264_idct_add8_neon, export=1
         add             r5,  r1,  #16*4
         add             r1,  r2,  #16*32
         mov             r2,  r3
-        mov             r3,  r1
+        mov             r10, r1
         ldr             r6,  [sp, #32]
         movrel          r7,  scan8+16
         mov             r12, #0
@@ -156,7 +162,7 @@ function ff_h264_idct_add8_neon, export=1
         ldr             r0,  [r5, r12, lsl #2]
         ldrb            r8,  [r6, r8]
         add             r0,  r0,  r4
-        add             r1,  r3,  r12, lsl #5
+        add             r1,  r10, r12, lsl #5
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
@@ -180,7 +186,9 @@ endfunc
         qb      .req    q14
         vshr.s16        q2,  q10, #1
         vadd.i16        q0,  q8,  q12
-        vld1.16         {q14-q15},[r1,:128]!
+        vld1.16         {q14-q15},[r1,:128]
+        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q7},     [r1,:128]!
         vsub.i16        q1,  q8,  q12
         vshr.s16        q3,  q14, #1
         vsub.i16        q2,  q2,  q14
@@ -259,9 +267,16 @@ endfunc
 .endm
 
 function ff_h264_idct8_add_neon, export=1
-        vld1.16         {q8-q9},  [r1,:128]!
-        vld1.16         {q10-q11},[r1,:128]!
-        vld1.16         {q12-q13},[r1,:128]!
+        vmov.i16        q7,       #0
+        vld1.16         {q8-q9},  [r1,:128]
+        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q7},     [r1,:128]!
+        vld1.16         {q10-q11},[r1,:128]
+        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q7},     [r1,:128]!
+        vld1.16         {q12-q13},[r1,:128]
+        vst1.16         {q7},     [r1,:128]!
+        vst1.16         {q7},     [r1,:128]!
 
         idct8x8_cols    0
         idct8x8_cols    1
@@ -313,7 +328,9 @@ function ff_h264_idct8_add_neon, export=1
 endfunc
 
 function ff_h264_idct8_dc_add_neon, export=1
+        mov             r3,       #0
         vld1.16         {d30[],d31[]},[r1,:16]
+        strh            r3,       [r1]
         vld1.32         {d0},     [r0,:64], r2
         vrshr.s16       q15, q15, #6
         vld1.32         {d1},     [r0,:64], r2