summaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2012-06-08 13:49:56 -0400
committerJustin Ruggles <justin.ruggles@gmail.com>2012-06-18 18:01:14 -0400
commitcb5042d02c66aed68643633446f6bf623b72416e (patch)
treec8e75354a37c981aa3a488d7c236750a38d9b1d9 /libavutil
parent4e4dd7173023502b5b3e7c3d7ccd7e6fe45b7afe (diff)
downloadffmpeg-streaming-cb5042d02c66aed68643633446f6bf623b72416e.zip
ffmpeg-streaming-cb5042d02c66aed68643633446f6bf623b72416e.tar.gz
float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/arm/float_dsp_init_neon.c4
-rw-r--r--libavutil/arm/float_dsp_neon.S48
-rw-r--r--libavutil/float_dsp.c9
-rw-r--r--libavutil/float_dsp.h16
4 files changed, 77 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index fa6d0d7..3ca0288 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -26,7 +26,11 @@
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
+ int len);
+
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{
fdsp->vector_fmul = ff_vector_fmul_neon;
+ fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
}
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index d66fa09..03b1643 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1
3: vst1.32 {d16-d19},[r0,:128]!
bx lr
endfunc
+
+function ff_vector_fmac_scalar_neon, export=1
+VFP len .req r2
+VFP acc .req r3
+NOVFP len .req r3
+NOVFP acc .req r2
+VFP vdup.32 q15, d0[0]
+NOVFP vdup.32 q15, r2
+ bics r12, len, #15
+ mov acc, r0
+ beq 3f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+1: vmla.f32 q8, q0, q15
+ vld1.32 {q2}, [r1,:128]!
+ vld1.32 {q10}, [acc,:128]!
+ vmla.f32 q9, q1, q15
+ vld1.32 {q3}, [r1,:128]!
+ vld1.32 {q11}, [acc,:128]!
+ vmla.f32 q10, q2, q15
+ vst1.32 {q8}, [r0,:128]!
+ vmla.f32 q11, q3, q15
+ vst1.32 {q9}, [r0,:128]!
+ subs r12, r12, #16
+ beq 2f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vst1.32 {q10}, [r0,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ b 1b
+2: vst1.32 {q10}, [r0,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ ands len, len, #15
+ it eq
+ bxeq lr
+3: vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vmla.f32 q8, q0, q15
+ vst1.32 {q8}, [r0,:128]!
+ subs len, len, #4
+ bgt 3b
+ bx lr
+ .unreq len
+endfunc
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 039dd07..2e90939 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
dst[i] = src0[i] * src1[i];
}
+static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
+ int len)
+{
+ int i;
+ for (i = 0; i < len; i++)
+ dst[i] += src[i] * mul;
+}
+
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{
fdsp->vector_fmul = vector_fmul_c;
+ fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
#if ARCH_ARM
ff_float_dsp_init_arm(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 30161a2..4e26630 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext {
*/
void (*vector_fmul)(float *dst, const float *src0, const float *src1,
int len);
+
+ /**
+ * Multiply a vector of floats by a scalar float and add to
+ * destination vector. Source and destination vectors must
+ * overlap exactly or not at all.
+ *
+ * @param dst result vector
+ * constraints: 16-byte aligned
+ * @param src input vector
+ * constraints: 16-byte aligned
+ * @param mul scalar value
+ * @param len length of vector
+ * constraints: multiple of 4
+ */
+ void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
+ int len);
} AVFloatDSPContext;
/**
OpenPOWER on IntegriCloud