summaryrefslogtreecommitdiffstats
path: root/libswresample/x86/audio_convert_init.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-11-06 20:43:06 -0300
committerJames Almer <jamrial@gmail.com>2014-11-06 20:54:00 -0300
commitb385c4c6a316f798b6a14418f09e545cda7fef7f (patch)
tree2fb3571e48727956078258fe0da9325ff70c5583 /libswresample/x86/audio_convert_init.c
parent93ab6693d8cf6e01ba415dde89860e07abed9e0b (diff)
downloadffmpeg-streaming-b385c4c6a316f798b6a14418f09e545cda7fef7f.zip
ffmpeg-streaming-b385c4c6a316f798b6a14418f09e545cda7fef7f.tar.gz
x86/swr: replace sse4 instructions in pack_6ch with sse ones
There's no benefit from using blendps here except on CPUs with AVX, where it's faster than shufps according to Intel's documentation. As such, rename the sse4 functions to sse/sse2 and use shufps instead. Reviewed-by: Michael Niedermayer <michaelni@gmx.at> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libswresample/x86/audio_convert_init.c')
-rw-r--r--libswresample/x86/audio_convert_init.c23
1 files changed, 12 insertions, 11 deletions
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index a26cdf6..769575d 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -58,7 +58,12 @@ MULTI_CAPS_FUNC(SSE2, sse2)
ac->simd_f = ff_pack_6ch_float_to_float_a_mmx;
}
}
-
+ if(EXTERNAL_SSE(mm_flags)) {
+ if(channels == 6) {
+ if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+ ac->simd_f = ff_pack_6ch_float_to_float_a_sse;
+ }
+ }
if(EXTERNAL_SSE2(mm_flags)) {
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
ac->simd_f = ff_int32_to_float_a_sse2;
@@ -105,6 +110,12 @@ MULTI_CAPS_FUNC(SSE2, sse2)
if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLT)
ac->simd_f = ff_unpack_2ch_float_to_int16_a_sse2;
}
+ if(channels == 6) {
+ if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
+ ac->simd_f = ff_pack_6ch_int32_to_float_a_sse2;
+ if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
+ ac->simd_f = ff_pack_6ch_float_to_int32_a_sse2;
+ }
}
if(EXTERNAL_SSSE3(mm_flags)) {
if(channels == 2) {
@@ -116,16 +127,6 @@ MULTI_CAPS_FUNC(SSE2, sse2)
ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3;
}
}
- if(EXTERNAL_SSE4(mm_flags)) {
- if(channels == 6) {
- if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
- ac->simd_f = ff_pack_6ch_float_to_float_a_sse4;
- if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P)
- ac->simd_f = ff_pack_6ch_int32_to_float_a_sse4;
- if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP)
- ac->simd_f = ff_pack_6ch_float_to_int32_a_sse4;
- }
- }
if(EXTERNAL_AVX(mm_flags)) {
if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
ac->simd_f = ff_int32_to_float_a_avx;
OpenPOWER on IntegriCloud