summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2017-03-20 22:28:38 +0100
committerClément Bœsch <u@pkh.me>2017-03-20 22:35:07 +0100
commit83cd80d10aebd1bde7310ab3d058134d0642a6bb (patch)
treece0aa14de18b3c8745924d2fa88a2927c85d6c3d
parentbbc3bde14f1402a68c64a28edc347464554589cb (diff)
parent12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5 (diff)
downloadffmpeg-streaming-83cd80d10aebd1bde7310ab3d058134d0642a6bb.zip
ffmpeg-streaming-83cd80d10aebd1bde7310ab3d058134d0642a6bb.tar.gz
Merge commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5'
* commit '12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5': audiodsp/x86: yasmify vector_clipf_sse audiodsp: reorder arguments for vector_clipf Merged the version from Libav after a discussion with James Almer on IRC: 19:22 <ubitux> jamrial: opinion on 12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5? 19:23 <ubitux> it was apparently yasmified differently 19:23 <ubitux> (it depends on the previous commit arg shuffle) 19:24 <ubitux> i don't see the magic movsxdifnidn in your port btw 19:24 <ubitux> it's a port from 1d36defe94c7d7ebf995d4dbb4f878d06272f9c6 19:25 <jamrial> seems better thanks to said arg shuffle 19:25 <jamrial> the loop is the same, but init is simpler 19:25 <jamrial> probably worth merging 19:25 <ubitux> OK 19:25 <ubitux> thanks 19:26 <jamrial> curious they didn't make len ptrdiff_t after the previous bunch of commits, heh 19:26 <ubitux> yeah indeed Both commits are merged at the same time to prevent a conflict with our existing yasmified ff_vector_clipf_sse. Merged-by: Clément Bœsch <u@pkh.me>
-rw-r--r--libavcodec/ac3enc_float.c2
-rw-r--r--libavcodec/arm/audiodsp_init_neon.c3
-rw-r--r--libavcodec/arm/audiodsp_neon.S5
-rw-r--r--libavcodec/audiodsp.c4
-rw-r--r--libavcodec/audiodsp.h3
-rw-r--r--libavcodec/cook.c2
-rw-r--r--libavcodec/x86/audiodsp.asm81
-rw-r--r--libavcodec/x86/audiodsp_init.c2
-rw-r--r--tests/checkasm/audiodsp.c8
9 files changed, 54 insertions, 56 deletions
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 6c91f45..caa210d 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
static void clip_coefficients(AudioDSPContext *adsp, float *coef,
unsigned int len)
{
- adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
+ adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX);
}
diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c
index f7bd162..6902db8 100644
--- a/libavcodec/arm/audiodsp_init_neon.c
+++ b/libavcodec/arm/audiodsp_init_neon.c
@@ -25,8 +25,7 @@
#include "libavcodec/audiodsp.h"
#include "audiodsp_arm.h"
-void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
- int len);
+void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len);
diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S
index ab32cef..cea700c 100644
--- a/libavcodec/arm/audiodsp_neon.S
+++ b/libavcodec/arm/audiodsp_neon.S
@@ -24,9 +24,8 @@
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0]
-NOVFP vdup.32 q0, r2
-NOVFP vdup.32 q1, r3
-NOVFP ldr r2, [sp]
+NOVFP vdup.32 q0, r3
+NOVFP vld1.32 {d2[],d3[]}, [sp]
vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]!
diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c
index 85b5a74..3c7a3a7 100644
--- a/libavcodec/audiodsp.c
+++ b/libavcodec/audiodsp.c
@@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src,
}
}
-static void vector_clipf_c(float *dst, const float *src,
- float min, float max, int len)
+static void vector_clipf_c(float *dst, const float *src, int len,
+ float min, float max)
{
int i;
diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h
index 003a1d0..aa6fa78 100644
--- a/libavcodec/audiodsp.h
+++ b/libavcodec/audiodsp.h
@@ -48,7 +48,8 @@ typedef struct AudioDSPContext {
/* assume len is a multiple of 16, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */,
const float *src /* align 16 */,
- float min, float max, int len /* align 16 */);
+ int len /* align 16 */,
+ float min, float max);
} AudioDSPContext;
void ff_audiodsp_init(AudioDSPContext *c);
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 4488f8e..53cb8385 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
static void saturate_output_float(COOKContext *q, float *out)
{
q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
- -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8));
+ FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f);
}
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 3eeb6fd..8ef2a8c 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
VECTOR_CLIP_INT32 6, 1, 0, 0
%endif
-;-----------------------------------------------------
-;void ff_vector_clipf(float *dst, const float *src,
-; float min, float max, int len)
-;-----------------------------------------------------
+; void ff_vector_clipf_sse(float *dst, const float *src,
+; int len, float min, float max)
INIT_XMM sse
-%if UNIX64
-cglobal vector_clipf, 3,3,6, dst, src, len
-%else
-cglobal vector_clipf, 5,5,6, dst, src, min, max, len
-%endif
-%if WIN64
- SWAP 0, 2
- SWAP 1, 3
-%elif ARCH_X86_32
- movss m0, minm
- movss m1, maxm
+cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
+%if ARCH_X86_32
+ VBROADCASTSS m0, minm
+ VBROADCASTSS m1, maxm
+%elif WIN64
+ VBROADCASTSS m0, m3
+ VBROADCASTSS m1, maxm
+%else ; 64bit sysv
+ VBROADCASTSS m0, m0
+ VBROADCASTSS m1, m1
%endif
- SPLATD m0
- SPLATD m1
- shl lend, 2
- add srcq, lenq
- add dstq, lenq
- neg lenq
-.loop:
- mova m2, [srcq+lenq+mmsize*0]
- mova m3, [srcq+lenq+mmsize*1]
- mova m4, [srcq+lenq+mmsize*2]
- mova m5, [srcq+lenq+mmsize*3]
- maxps m2, m0
- maxps m3, m0
- maxps m4, m0
- maxps m5, m0
- minps m2, m1
- minps m3, m1
- minps m4, m1
- minps m5, m1
- mova [dstq+lenq+mmsize*0], m2
- mova [dstq+lenq+mmsize*1], m3
- mova [dstq+lenq+mmsize*2], m4
- mova [dstq+lenq+mmsize*3], m5
- add lenq, mmsize*4
- jl .loop
- REP_RET
+
+ movsxdifnidn lenq, lend
+
+.loop
+ mova m2, [srcq + 4 * lenq - 4 * mmsize]
+ mova m3, [srcq + 4 * lenq - 3 * mmsize]
+ mova m4, [srcq + 4 * lenq - 2 * mmsize]
+ mova m5, [srcq + 4 * lenq - 1 * mmsize]
+
+ maxps m2, m0
+ maxps m3, m0
+ maxps m4, m0
+ maxps m5, m0
+
+ minps m2, m1
+ minps m3, m1
+ minps m4, m1
+ minps m5, m1
+
+ mova [dstq + 4 * lenq - 4 * mmsize], m2
+ mova [dstq + 4 * lenq - 3 * mmsize], m3
+ mova [dstq + 4 * lenq - 2 * mmsize], m4
+ mova [dstq + 4 * lenq - 1 * mmsize], m5
+
+ sub lenq, mmsize
+ jg .loop
+
+ RET
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index 8f9e604..98e296c 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len);
void ff_vector_clipf_sse(float *dst, const float *src,
- float min, float max, int len);
+ int len, float min, float max);
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
{
diff --git a/tests/checkasm/audiodsp.c b/tests/checkasm/audiodsp.c
index 1da1d1e..7c4e16e 100644
--- a/tests/checkasm/audiodsp.c
+++ b/tests/checkasm/audiodsp.c
@@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void)
int i, len;
declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
- float min, float max, unsigned int len);
+ int len, float min, float max);
val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
@@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void)
len = rnd() % 128;
len = 16 * FFMAX(len, 1);
- call_ref(dst0, src, min, max, len);
- call_new(dst1, src, min, max, len);
+ call_ref(dst0, src, len, min, max);
+ call_new(dst1, src, len, min, max);
for (i = 0; i < len; i++) {
if (!float_near_ulp_array(dst0, dst1, 3, len))
fail();
}
- bench_new(dst1, src, min, max, MAX_SIZE);
+ bench_new(dst1, src, MAX_SIZE, min, max);
}
report("audiodsp");
OpenPOWER on IntegriCloud