summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2017-04-04 12:42:58 -0400
committerRonald S. Bultje <rsbultje@gmail.com>2017-04-06 10:03:28 -0400
commite0c205677f6b3b7dba6891724cb68bfb81e9b8d6 (patch)
tree18c3c474de266d4ac7e81339f8fa1b52a3e9950e
parent2f0591cfa3b773d7a2fec72b30ec25d4ffb0cb32 (diff)
downloadffmpeg-streaming-e0c205677f6b3b7dba6891724cb68bfb81e9b8d6.zip
ffmpeg-streaming-e0c205677f6b3b7dba6891724cb68bfb81e9b8d6.tar.gz
x86/simple_idct: add explicit sse2 simple_idct_put/add versions.
These use the mmx IDCT, but sse2 put/add_pixels_clamped implementations. This way we don't need to use the ff_put/add_pixels_clamped function pointers.
-rw-r--r--libavcodec/x86/idctdsp_init.c38
-rw-r--r--libavcodec/x86/simple_idct.c15
-rw-r--r--libavcodec/x86/simple_idct.h3
3 files changed, 42 insertions, 14 deletions
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index bcf7e5b..3f078e8 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -63,27 +63,41 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
{
int cpu_flags = av_get_cpu_flags();
- if (INLINE_MMX(cpu_flags)) {
- if (!high_bit_depth &&
- avctx->lowres == 0 &&
- (avctx->idct_algo == FF_IDCT_AUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
- c->idct_put = ff_simple_idct_put_mmx;
- c->idct_add = ff_simple_idct_add_mmx;
- c->idct = ff_simple_idct_mmx;
- c->perm_type = FF_IDCT_PERM_SIMPLE;
- }
- }
if (EXTERNAL_MMX(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
+
+ if (INLINE_MMX(cpu_flags)) {
+ if (!high_bit_depth &&
+ avctx->lowres == 0 &&
+ (avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+ c->idct_put = ff_simple_idct_put_mmx;
+ c->idct_add = ff_simple_idct_add_mmx;
+ c->idct = ff_simple_idct_mmx;
+ c->perm_type = FF_IDCT_PERM_SIMPLE;
+ }
+ }
}
+
if (EXTERNAL_SSE2(cpu_flags)) {
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
c->put_pixels_clamped = ff_put_pixels_clamped_sse2;
c->add_pixels_clamped = ff_add_pixels_clamped_sse2;
+
+ if (INLINE_SSE2(cpu_flags)) {
+ if (!high_bit_depth &&
+ avctx->lowres == 0 &&
+ (avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+ c->idct_put = ff_simple_idct_put_sse2;
+ c->idct_add = ff_simple_idct_add_sse2;
+ c->perm_type = FF_IDCT_PERM_SIMPLE;
+ }
+ }
}
if (ARCH_X86_64 && avctx->lowres == 0) {
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index d3a19fa..1155920 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -24,6 +24,7 @@
#include "libavutil/x86/asm.h"
#include "libavcodec/idctdsp.h"
+#include "libavcodec/x86/idctdsp.h"
#include "idctdsp.h"
#include "simple_idct.h"
@@ -907,12 +908,22 @@ void ff_simple_idct_mmx(int16_t *block)
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
idct(block);
- ff_put_pixels_clamped(block, dest, line_size);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
}
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
{
idct(block);
- ff_add_pixels_clamped(block, dest, line_size);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
+}
+void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+ idct(block);
+ ff_put_pixels_clamped_sse2(block, dest, line_size);
+}
+void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
+{
+ idct(block);
+ ff_add_pixels_clamped_sse2(block, dest, line_size);
}
#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h
index ad76baf..d17ef6a 100644
--- a/libavcodec/x86/simple_idct.h
+++ b/libavcodec/x86/simple_idct.h
@@ -26,6 +26,9 @@ void ff_simple_idct_mmx(int16_t *block);
void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
+
void ff_simple_idct10_sse2(int16_t *block);
void ff_simple_idct10_avx(int16_t *block);
OpenPOWER on IntegriCloud