From cb5042d02c66aed68643633446f6bf623b72416e Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Fri, 8 Jun 2012 13:49:56 -0400
Subject: float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil

---
 libavutil/arm/float_dsp_init_neon.c |  4 ++++
 libavutil/arm/float_dsp_neon.S      | 48 +++++++++++++++++++++++++++++++++++++
 libavutil/float_dsp.c               |  9 +++++++
 libavutil/float_dsp.h               | 16 +++++++++++++
 4 files changed, 77 insertions(+)

(limited to 'libavutil')

diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index fa6d0d7..3ca0288 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -26,7 +26,11 @@
 
 void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
 
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
+                                int len);
+
 void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
 {
     fdsp->vector_fmul = ff_vector_fmul_neon;
+    fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
 }
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index d66fa09..03b1643 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1
 3:      vst1.32         {d16-d19},[r0,:128]!
         bx              lr
 endfunc
+
+function ff_vector_fmac_scalar_neon, export=1
+VFP     len .req r2
+VFP     acc .req r3
+NOVFP   len .req r3
+NOVFP   acc .req r2
+VFP     vdup.32         q15, d0[0]
+NOVFP   vdup.32         q15, r2
+        bics            r12, len, #15
+        mov             acc, r0
+        beq             3f
+        vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [acc,:128]!
+        vld1.32         {q1},     [r1,:128]!
+        vld1.32         {q9},     [acc,:128]!
+1:      vmla.f32        q8,  q0,  q15
+        vld1.32         {q2},     [r1,:128]!
+        vld1.32         {q10},    [acc,:128]!
+        vmla.f32        q9,  q1,  q15
+        vld1.32         {q3},     [r1,:128]!
+        vld1.32         {q11},    [acc,:128]!
+        vmla.f32        q10, q2,  q15
+        vst1.32         {q8},     [r0,:128]!
+        vmla.f32        q11, q3,  q15
+        vst1.32         {q9},     [r0,:128]!
+        subs            r12, r12, #16
+        beq             2f
+        vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [acc,:128]!
+        vst1.32         {q10},    [r0,:128]!
+        vld1.32         {q1},     [r1,:128]!
+        vld1.32         {q9},     [acc,:128]!
+        vst1.32         {q11},    [r0,:128]!
+        b               1b
+2:      vst1.32         {q10},    [r0,:128]!
+        vst1.32         {q11},    [r0,:128]!
+        ands            len, len, #15
+        it              eq
+        bxeq            lr
+3:      vld1.32         {q0},     [r1,:128]!
+        vld1.32         {q8},     [acc,:128]!
+        vmla.f32        q8,  q0,  q15
+        vst1.32         {q8},     [r0,:128]!
+        subs            len, len, #4
+        bgt             3b
+        bx              lr
+        .unreq          len
+endfunc
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 039dd07..2e90939 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
         dst[i] = src0[i] * src1[i];
 }
 
+static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
+                                 int len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+        dst[i] += src[i] * mul;
+}
+
 void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
 {
     fdsp->vector_fmul = vector_fmul_c;
+    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
 
 #if ARCH_ARM
     ff_float_dsp_init_arm(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 30161a2..4e26630 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext {
      */
     void (*vector_fmul)(float *dst, const float *src0, const float *src1,
                         int len);
+
+    /**
+     * Multiply a vector of floats by a scalar float and add to
+     * destination vector.  Source and destination vectors must
+     * overlap exactly or not at all.
+     *
+     * @param dst result vector
+     *            constraints: 16-byte aligned
+     * @param src input vector
+     *            constraints: 16-byte aligned
+     * @param mul scalar value
+     * @param len length of vector
+     *            constraints: multiple of 4
+     */
+    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
+                               int len);
 } AVFloatDSPContext;
 
 /**
-- 
cgit v1.1