1 files changed, 179 insertions, 0 deletions
diff --git a/libavcodec/alpha/me_cmp_mvi_asm.S b/libavcodec/alpha/me_cmp_mvi_asm.S
new file mode 100644
index 0000000..2399085
--- /dev/null
+++ b/libavcodec/alpha/me_cmp_mvi_asm.S
@@ -0,0 +1,179 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "regdef.h"
+
+/* Some nicer register names.  */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+        .set noat
+        .set noreorder
+        .arch pca56
+        .text
+
+/*****************************************************************************
+ * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
+ *
+ * This code is written with a pca56 in mind. For ev6, one should
+ * really take the increased latency of 3 cycles for MVI instructions
+ * into account.
+ *
+ * It is important to keep the loading and first use of a register as
+ * far apart as possible, because if a register is accessed before it
+ * has been fetched from memory, the CPU will stall.
+ */
+        .align 4
+        .globl pix_abs16x16_mvi_asm
+        .ent pix_abs16x16_mvi_asm
+pix_abs16x16_mvi_asm:
+        .frame sp, 0, ra, 0
+        .prologue 0
+
+        and     a2, 7, t0
+        clr     v0
+        beq     t0, $aligned
+        .align 4
+$unaligned:
+        /* Registers:
+           line 0:
+           t0:  left_u -> left lo -> left
+           t1:  mid
+           t2:  right_u -> right hi -> right
+           t3:  ref left
+           t4:  ref right
+           line 1:
+           t5:  left_u -> left lo -> left
+           t6:  mid
+           t7:  right_u -> right hi -> right
+           t8:  ref left
+           t9:  ref right
+           temp:
+           ta:  left hi
+           tb:  right lo
+           tc:  error left
+           td:  error right  */
+
+        /* load line 0 */
+        ldq_u   t0, 0(a2)       # left_u
+        ldq_u   t1, 8(a2)       # mid
+        ldq_u   t2, 16(a2)      # right_u
+        ldq     t3, 0(a1)       # ref left
+        ldq     t4, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        addq    a2, a3, a2      # pix2
+        /* load line 1 */
+        ldq_u   t5, 0(a2)       # left_u
+        ldq_u   t6, 8(a2)       # mid
+        ldq_u   t7, 16(a2)      # right_u
+        ldq     t8, 0(a1)       # ref left
+        ldq     t9, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        addq    a2, a3, a2      # pix2
+        /* calc line 0 */
+        extql   t0, a2, t0      # left lo
+        extqh   t1, a2, ta      # left hi
+        extql   t1, a2, tb      # right lo
+        or      t0, ta, t0      # left
+        extqh   t2, a2, t2      # right hi
+        perr    t3, t0, tc      # error left
+        or      t2, tb, t2      # right
+        perr    t4, t2, td      # error right
+        addq    v0, tc, v0      # add error left
+        addq    v0, td, v0      # add error left
+        /* calc line 1 */
+        extql   t5, a2, t5      # left lo
+        extqh   t6, a2, ta      # left hi
+        extql   t6, a2, tb      # right lo
+        or      t5, ta, t5      # left
+        extqh   t7, a2, t7      # right hi
+        perr    t8, t5, tc      # error left
+        or      t7, tb, t7      # right
+        perr    t9, t7, td      # error right
+        addq    v0, tc, v0      # add error left
+        addq    v0, td, v0      # add error left
+        /* loop */
+        subq    a4,  2, a4      # h -= 2
+        bne     a4, $unaligned
+        ret
+
+        .align 4
+$aligned:
+        /* load line 0 */
+        ldq     t0, 0(a2)       # left
+        ldq     t1, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     t2, 0(a1)       # ref left
+        ldq     t3, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        /* load line 1 */
+        ldq     t4, 0(a2)       # left
+        ldq     t5, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     t6, 0(a1)       # ref left
+        ldq     t7, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        /* load line 2 */
+        ldq     t8, 0(a2)       # left
+        ldq     t9, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     ta, 0(a1)       # ref left
+        ldq     tb, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        /* load line 3 */
+        ldq     tc, 0(a2)       # left
+        ldq     td, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     te, 0(a1)       # ref left
+        ldq     a0, 8(a1)       # ref right
+        /* calc line 0 */
+        perr    t0, t2, t0      # error left
+        addq    a1, a3, a1      # pix1
+        perr    t1, t3, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 1 */
+        perr    t4, t6, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    t5, t7, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 2 */
+        perr    t8, ta, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    t9, tb, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 3 */
+        perr    tc, te, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    td, a0, t1      # error right
+        addq    v0, t0, v0      # add error left
+        addq    v0, t1, v0      # add error right
+        /* loop */
+        subq    a4,  4, a4      # h -= 4
+        bne     a4, $aligned
+        ret
+        .end pix_abs16x16_mvi_asm