diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2017-04-04 12:42:58 -0400 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2017-04-06 10:03:28 -0400 |
commit | e0c205677f6b3b7dba6891724cb68bfb81e9b8d6 (patch) | |
tree | 18c3c474de266d4ac7e81339f8fa1b52a3e9950e | |
parent | 2f0591cfa3b773d7a2fec72b30ec25d4ffb0cb32 (diff) | |
download | ffmpeg-streaming-e0c205677f6b3b7dba6891724cb68bfb81e9b8d6.zip ffmpeg-streaming-e0c205677f6b3b7dba6891724cb68bfb81e9b8d6.tar.gz |
x86/simple_idct: add explicit sse2 simple_idct_put/add versions.
These use the mmx IDCT, but sse2 put/add_pixels_clamped implementations.
This way we don't need to use the ff_put/add_pixels_clamped function
pointers.
-rw-r--r-- | libavcodec/x86/idctdsp_init.c | 38 | ||||
-rw-r--r-- | libavcodec/x86/simple_idct.c | 15 | ||||
-rw-r--r-- | libavcodec/x86/simple_idct.h | 3 |
3 files changed, 42 insertions, 14 deletions
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c index bcf7e5b..3f078e8 100644 --- a/libavcodec/x86/idctdsp_init.c +++ b/libavcodec/x86/idctdsp_init.c @@ -63,27 +63,41 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, { int cpu_flags = av_get_cpu_flags(); - if (INLINE_MMX(cpu_flags)) { - if (!high_bit_depth && - avctx->lowres == 0 && - (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEAUTO || - avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { - c->idct_put = ff_simple_idct_put_mmx; - c->idct_add = ff_simple_idct_add_mmx; - c->idct = ff_simple_idct_mmx; - c->perm_type = FF_IDCT_PERM_SIMPLE; - } - } if (EXTERNAL_MMX(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; + + if (INLINE_MMX(cpu_flags)) { + if (!high_bit_depth && + avctx->lowres == 0 && + (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEAUTO || + avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { + c->idct_put = ff_simple_idct_put_mmx; + c->idct_add = ff_simple_idct_add_mmx; + c->idct = ff_simple_idct_mmx; + c->perm_type = FF_IDCT_PERM_SIMPLE; + } + } } + if (EXTERNAL_SSE2(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; c->put_pixels_clamped = ff_put_pixels_clamped_sse2; c->add_pixels_clamped = ff_add_pixels_clamped_sse2; + + if (INLINE_SSE2(cpu_flags)) { + if (!high_bit_depth && + avctx->lowres == 0 && + (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEAUTO || + avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { + c->idct_put = ff_simple_idct_put_sse2; + c->idct_add = ff_simple_idct_add_sse2; + c->perm_type = FF_IDCT_PERM_SIMPLE; + } + } } if (ARCH_X86_64 && avctx->lowres == 0) { diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c index d3a19fa..1155920 100644 --- a/libavcodec/x86/simple_idct.c +++ b/libavcodec/x86/simple_idct.c @@ -24,6 +24,7 @@ #include "libavutil/x86/asm.h" #include "libavcodec/idctdsp.h" +#include "libavcodec/x86/idctdsp.h" #include "idctdsp.h" #include "simple_idct.h" @@ -907,12 +908,22 @@ void ff_simple_idct_mmx(int16_t *block) void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block) { idct(block); - ff_put_pixels_clamped(block, dest, line_size); + ff_put_pixels_clamped_mmx(block, dest, line_size); } void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block) { idct(block); - ff_add_pixels_clamped(block, dest, line_size); + ff_add_pixels_clamped_mmx(block, dest, line_size); +} +void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block) +{ + idct(block); + ff_put_pixels_clamped_sse2(block, dest, line_size); +} +void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block) +{ + idct(block); + ff_add_pixels_clamped_sse2(block, dest, line_size); } #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h index ad76baf..d17ef6a 100644 --- a/libavcodec/x86/simple_idct.h +++ b/libavcodec/x86/simple_idct.h @@ -26,6 +26,9 @@ void ff_simple_idct_mmx(int16_t *block); void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block); + void ff_simple_idct10_sse2(int16_t *block); void ff_simple_idct10_avx(int16_t *block); |