summaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile11
-rw-r--r--libavcodec/x86/ac3dsp.asm12
-rw-r--r--libavcodec/x86/ac3dsp_mmx.c8
-rw-r--r--libavcodec/x86/cabac.h4
-rw-r--r--libavcodec/x86/cavsdsp_mmx.c8
-rw-r--r--libavcodec/x86/dct32_sse.asm12
-rw-r--r--libavcodec/x86/deinterlace.asm12
-rw-r--r--libavcodec/x86/diracdsp_mmx.c95
-rw-r--r--libavcodec/x86/diracdsp_mmx.h47
-rw-r--r--libavcodec/x86/diracdsp_yasm.asm260
-rw-r--r--libavcodec/x86/dnxhd_mmx.c8
-rw-r--r--libavcodec/x86/dsputil_mmx.c92
-rw-r--r--libavcodec/x86/dsputil_mmx.h8
-rw-r--r--libavcodec/x86/dsputil_mmx_avg_template.c8
-rw-r--r--libavcodec/x86/dsputil_mmx_qns_template.c8
-rw-r--r--libavcodec/x86/dsputil_mmx_rnd_template.c8
-rw-r--r--libavcodec/x86/dsputil_yasm.asm13
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c8
-rw-r--r--libavcodec/x86/dsputilenc_yasm.asm12
-rw-r--r--libavcodec/x86/dwt.c202
-rw-r--r--libavcodec/x86/dwt.h30
-rw-r--r--libavcodec/x86/dwt_yasm.asm291
-rw-r--r--libavcodec/x86/fdct_mmx.c12
-rw-r--r--libavcodec/x86/fft.c8
-rw-r--r--libavcodec/x86/fft.h8
-rw-r--r--libavcodec/x86/fft_3dn.c8
-rw-r--r--libavcodec/x86/fft_3dn2.c8
-rw-r--r--libavcodec/x86/fft_mmx.asm11
-rw-r--r--libavcodec/x86/fft_sse.c8
-rw-r--r--libavcodec/x86/fmtconvert.asm12
-rw-r--r--libavcodec/x86/fmtconvert_mmx.c8
-rw-r--r--libavcodec/x86/h264_chromamc.asm12
-rw-r--r--libavcodec/x86/h264_deblock.asm24
-rw-r--r--libavcodec/x86/h264_deblock_10bit.asm24
-rw-r--r--libavcodec/x86/h264_i386.h8
-rw-r--r--libavcodec/x86/h264_idct.asm12
-rw-r--r--libavcodec/x86/h264_idct_10bit.asm4
-rw-r--r--libavcodec/x86/h264_intrapred.asm12
-rw-r--r--libavcodec/x86/h264_intrapred_10bit.asm4
-rw-r--r--libavcodec/x86/h264_intrapred_init.c11
-rw-r--r--libavcodec/x86/h264_qpel_mmx.c8
-rw-r--r--libavcodec/x86/h264_weight.asm17
-rw-r--r--libavcodec/x86/h264dsp_mmx.c13
-rw-r--r--libavcodec/x86/idct_mmx_xvid.c8
-rw-r--r--libavcodec/x86/idct_sse2_xvid.c8
-rw-r--r--libavcodec/x86/idct_xvid.h8
-rw-r--r--libavcodec/x86/imdct36_sse.asm4
-rw-r--r--libavcodec/x86/lpc_mmx.c8
-rw-r--r--libavcodec/x86/mathops.h8
-rw-r--r--libavcodec/x86/mlpdsp.c8
-rw-r--r--libavcodec/x86/motion_est_mmx.c8
-rw-r--r--libavcodec/x86/mpegaudiodec_mmx.c19
-rw-r--r--libavcodec/x86/mpegvideo_mmx.c8
-rw-r--r--libavcodec/x86/mpegvideo_mmx_template.c19
-rw-r--r--libavcodec/x86/pngdsp-init.c8
-rw-r--r--libavcodec/x86/pngdsp.asm2
-rw-r--r--libavcodec/x86/proresdsp-init.c5
-rw-r--r--libavcodec/x86/proresdsp.asm148
-rw-r--r--libavcodec/x86/rv34dsp.asm6
-rw-r--r--libavcodec/x86/rv40dsp.asm8
-rw-r--r--libavcodec/x86/simple_idct_mmx.c8
-rw-r--r--libavcodec/x86/snowdsp_mmx.c12
-rw-r--r--libavcodec/x86/v210-init.c48
-rw-r--r--libavcodec/x86/v210.asm89
-rw-r--r--libavcodec/x86/vc1dsp_yasm.asm12
-rw-r--r--libavcodec/x86/vp3dsp.asm12
-rw-r--r--libavcodec/x86/vp56_arith.h8
-rw-r--r--libavcodec/x86/vp56dsp.asm12
-rw-r--r--libavcodec/x86/vp56dsp_init.c8
-rw-r--r--libavcodec/x86/vp8dsp-init.c8
-rw-r--r--libavcodec/x86/vp8dsp.asm12
71 files changed, 1494 insertions, 407 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index fc88433..dc8c66a 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -9,6 +9,8 @@ YASM-OBJS-FFT-$(HAVE_SSE) += x86/fft_sse.o
YASM-OBJS-$(CONFIG_FFT) += x86/fft_mmx.o \
$(YASM-OBJS-FFT-yes)
+YASM-OBJS-$(CONFIG_DWT) += x86/dwt_yasm.o
+
YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
x86/h264_chromamc_10bit.o
@@ -33,6 +35,8 @@ YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o
+YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o
+
MMX-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_mmx.o
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
@@ -43,11 +47,16 @@ MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o
MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o
MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o
+YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
+MMX-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp-init.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
MMX-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp-init.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o
-MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
+MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o \
+ x86/dwt.o
+YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
+MMX-OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 746fd83..a7380f9 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -2,25 +2,25 @@
;* x86-optimized AC-3 DSP utils
;* Copyright (c) 2011 Justin Ruggles
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index d6bb469..9578e98 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -2,20 +2,20 @@
* x86-optimized AC-3 DSP utils
* Copyright (c) 2011 Justin Ruggles
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 3c3652d..2bb0be6 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -81,7 +81,9 @@
"add "tmp" , "low" \n\t"\
"1: \n\t"
-#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS)
+
+#if HAVE_7REGS && !defined(BROKEN_RELOCATIONS) && !(defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
+ && !(defined(__i386) && !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)
#define get_cabac_inline get_cabac_inline_x86
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
uint8_t *const state)
diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c
index 3bc62ea..0f5fdaa 100644
--- a/libavcodec/x86/cavsdsp_mmx.c
+++ b/libavcodec/x86/cavsdsp_mmx.c
@@ -5,20 +5,20 @@
* MMX-optimized DSP functions, based on H.264 optimizations by
* Michael Niedermayer and Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dct32_sse.asm b/libavcodec/x86/dct32_sse.asm
index e3c8a45..ca44106 100644
--- a/libavcodec/x86/dct32_sse.asm
+++ b/libavcodec/x86/dct32_sse.asm
@@ -2,25 +2,25 @@
;* 32 point SSE-optimized DCT transform
;* Copyright (c) 2010 Vitor Sessak
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA 32
diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm
index 8613485..a09473b 100644
--- a/libavcodec/x86/deinterlace.asm
+++ b/libavcodec/x86/deinterlace.asm
@@ -3,25 +3,25 @@
;* Copyright (c) 2010 Vitor Sessak
;* Copyright (c) 2002 Michael Niedermayer
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/diracdsp_mmx.c b/libavcodec/x86/diracdsp_mmx.c
new file mode 100644
index 0000000..a343bdd
--- /dev/null
+++ b/libavcodec/x86/diracdsp_mmx.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsputil_mmx.h"
+#include "diracdsp_mmx.h"
+
+void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+
+#define HPEL_FILTER(MMSIZE, EXT) \
+ void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, uint8_t *, int, int); \
+ void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, uint8_t *, int); \
+ \
+ static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
+ uint8_t *src, int stride, int width, int height) \
+ { \
+ while( height-- ) \
+ { \
+ ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
+ ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \
+ ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \
+ \
+ dsth += stride; \
+ dstv += stride; \
+ dstc += stride; \
+ src += stride; \
+ } \
+ }
+
+#if !ARCH_X86_64
+HPEL_FILTER(8, mmx)
+#endif
+HPEL_FILTER(16, sse2)
+
+#define PIXFUNC(PFX, IDX, EXT) \
+ /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \
+ c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
+ c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
+
+void ff_diracdsp_init_mmx(DiracDSPContext* c)
+{
+ int mm_flags = av_get_cpu_flags();
+
+#if HAVE_YASM
+ c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
+#if !ARCH_X86_64
+ c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
+ c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
+ c->dirac_hpel_filter = dirac_hpel_filter_mmx;
+ c->add_rect_clamped = ff_add_rect_clamped_mmx;
+ c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
+#endif
+#endif
+
+ PIXFUNC(put, 0, mmx);
+ PIXFUNC(avg, 0, mmx);
+
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
+ PIXFUNC(avg, 0, mmx2);
+ }
+
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
+#if HAVE_YASM
+ c->dirac_hpel_filter = dirac_hpel_filter_sse2;
+ c->add_rect_clamped = ff_add_rect_clamped_sse2;
+ c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
+
+ c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
+ c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
+#endif
+ c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
+ c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
+ c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
+ c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
+ }
+}
diff --git a/libavcodec/x86/diracdsp_mmx.h b/libavcodec/x86/diracdsp_mmx.h
new file mode 100644
index 0000000..3d8e117
--- /dev/null
+++ b/libavcodec/x86/diracdsp_mmx.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DIRACDSP_H
+#define AVCODEC_X86_DIRACDSP_H
+
+#include "libavcodec/diracdsp.h"
+
+void ff_diracdsp_init_mmx(DiracDSPContext* c);
+
+DECL_DIRAC_PIXOP(put, mmx);
+DECL_DIRAC_PIXOP(avg, mmx);
+DECL_DIRAC_PIXOP(avg, mmx2);
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+
+void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+
+void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+#endif
diff --git a/libavcodec/x86/diracdsp_yasm.asm b/libavcodec/x86/diracdsp_yasm.asm
new file mode 100644
index 0000000..72f57e6
--- /dev/null
+++ b/libavcodec/x86/diracdsp_yasm.asm
@@ -0,0 +1,260 @@
+;******************************************************************************
+;* Copyright (c) 2010 David Conrad
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+
+SECTION_RODATA
+pw_3: times 8 dw 3
+pw_7: times 8 dw 7
+pw_16: times 8 dw 16
+pw_32: times 8 dw 32
+pb_128: times 16 db 128
+
+section .text
+
+%macro UNPACK_ADD 6
+ mov%5 %1, %3
+ mov%6 m5, %4
+ mova m4, %1
+ mova %2, m5
+ punpcklbw %1, m7
+ punpcklbw m5, m7
+ punpckhbw m4, m7
+ punpckhbw %2, m7
+ paddw %1, m5
+ paddw %2, m4
+%endmacro
+
+%macro HPEL_FILTER 1
+; dirac_hpel_filter_v_sse2(uint8_t *dst, uint8_t *src, int stride, int width);
+cglobal dirac_hpel_filter_v_%1, 4,6,8, dst, src, stride, width, src0, stridex3
+ mov src0q, srcq
+ lea stridex3q, [3*strideq]
+ sub src0q, stridex3q
+ pxor m7, m7
+.loop:
+ ; 7*(src[0] + src[1])
+ UNPACK_ADD m0, m1, [srcq], [srcq + strideq], a,a
+ pmullw m0, [pw_7]
+ pmullw m1, [pw_7]
+
+ ; 3*( ... + src[-2] + src[3])
+ UNPACK_ADD m2, m3, [src0q + strideq], [srcq + stridex3q], a,a
+ paddw m0, m2
+ paddw m1, m3
+ pmullw m0, [pw_3]
+ pmullw m1, [pw_3]
+
+ ; ... - 7*(src[-1] + src[2])
+ UNPACK_ADD m2, m3, [src0q + strideq*2], [srcq + strideq*2], a,a
+ pmullw m2, [pw_7]
+ pmullw m3, [pw_7]
+ psubw m0, m2
+ psubw m1, m3
+
+ ; ... - (src[-3] + src[4])
+ UNPACK_ADD m2, m3, [src0q], [srcq + strideq*4], a,a
+ psubw m0, m2
+ psubw m1, m3
+
+ paddw m0, [pw_16]
+ paddw m1, [pw_16]
+ psraw m0, 5
+ psraw m1, 5
+ packuswb m0, m1
+ mova [dstq], m0
+ add dstq, mmsize
+ add srcq, mmsize
+ add src0q, mmsize
+ sub widthd, mmsize
+ jg .loop
+ RET
+
+; dirac_hpel_filter_h_sse2(uint8_t *dst, uint8_t *src, int width);
+cglobal dirac_hpel_filter_h_%1, 3,3,8, dst, src, width
+ dec widthd
+ pxor m7, m7
+ and widthd, ~(mmsize-1)
+.loop:
+ ; 7*(src[0] + src[1])
+ UNPACK_ADD m0, m1, [srcq + widthq], [srcq + widthq + 1], u,u
+ pmullw m0, [pw_7]
+ pmullw m1, [pw_7]
+
+ ; 3*( ... + src[-2] + src[3])
+ UNPACK_ADD m2, m3, [srcq + widthq - 2], [srcq + widthq + 3], u,u
+ paddw m0, m2
+ paddw m1, m3
+ pmullw m0, [pw_3]
+ pmullw m1, [pw_3]
+
+ ; ... - 7*(src[-1] + src[2])
+ UNPACK_ADD m2, m3, [srcq + widthq - 1], [srcq + widthq + 2], u,u
+ pmullw m2, [pw_7]
+ pmullw m3, [pw_7]
+ psubw m0, m2
+ psubw m1, m3
+
+ ; ... - (src[-3] + src[4])
+ UNPACK_ADD m2, m3, [srcq + widthq - 3], [srcq + widthq + 4], u,u
+ psubw m0, m2
+ psubw m1, m3
+
+ paddw m0, [pw_16]
+ paddw m1, [pw_16]
+ psraw m0, 5
+ psraw m1, 5
+ packuswb m0, m1
+ mova [dstq + widthq], m0
+ sub widthd, mmsize
+ jge .loop
+ RET
+%endmacro
+
+%macro PUT_RECT 1
+; void put_rect_clamped(uint8_t *dst, int dst_stride, int16_t *src, int src_stride, int width, int height)
+cglobal put_signed_rect_clamped_%1, 5,7,3, dst, dst_stride, src, src_stride, w, dst2, src2
+ mova m0, [pb_128]
+ add wd, (mmsize-1)
+ and wd, ~(mmsize-1)
+
+%if ARCH_X86_64
+ mov r10d, r5m
+ mov r11d, wd
+ %define wspill r11d
+ %define hd r10d
+%else
+ mov r4m, wd
+ %define wspill r4m
+ %define hd r5mp
+%endif
+
+.loopy
+ lea src2q, [srcq+src_strideq*2]
+ lea dst2q, [dstq+dst_strideq]
+.loopx:
+ sub wd, mmsize
+ mova m1, [srcq +2*wq]
+ mova m2, [src2q+2*wq]
+ packsswb m1, [srcq +2*wq+mmsize]
+ packsswb m2, [src2q+2*wq+mmsize]
+ paddb m1, m0
+ paddb m2, m0
+ mova [dstq +wq], m1
+ mova [dst2q+wq], m2
+ jg .loopx
+
+ lea srcq, [srcq+src_strideq*4]
+ lea dstq, [dstq+dst_strideq*2]
+ sub hd, 2
+ mov wd, wspill
+ jg .loopy
+ RET
+%endm
+
+%macro ADD_RECT 1
+; void add_rect_clamped(uint8_t *dst, uint16_t *src, int stride, int16_t *idwt, int idwt_stride, int width, int height)
+cglobal add_rect_clamped_%1, 7,7,3, dst, src, stride, idwt, idwt_stride, w, h
+ mova m0, [pw_32]
+ add wd, (mmsize-1)
+ and wd, ~(mmsize-1)
+
+%if ARCH_X86_64
+ mov r11d, wd
+ %define wspill r11d
+%else
+ mov r5m, wd
+ %define wspill r5m
+%endif
+
+.loop:
+ sub wd, mmsize
+ movu m1, [srcq +2*wq] ; FIXME: ensure alignment
+ paddw m1, m0
+ psraw m1, 6
+ movu m2, [srcq +2*wq+mmsize] ; FIXME: ensure alignment
+ paddw m2, m0
+ psraw m2, 6
+ paddw m1, [idwtq+2*wq]
+ paddw m2, [idwtq+2*wq+mmsize]
+ packuswb m1, m2
+ mova [dstq +wq], m1
+ jg .loop
+
+ lea srcq, [srcq + 2*strideq]
+ add dstq, strideq
+ lea idwtq, [idwtq+ 2*idwt_strideq]
+ sub hd, 1
+ mov wd, wspill
+ jg .loop
+ RET
+%endm
+
+%macro ADD_OBMC 2
+; void add_obmc(uint16_t *dst, uint8_t *src, int stride, uint8_t *obmc_weight, int yblen)
+cglobal add_dirac_obmc%1_%2, 6,6,5, dst, src, stride, obmc, yblen
+ pxor m4, m4
+.loop:
+%assign i 0
+%rep %1 / mmsize
+ mova m0, [srcq+i]
+ mova m1, m0
+ punpcklbw m0, m4
+ punpckhbw m1, m4
+ mova m2, [obmcq+i]
+ mova m3, m2
+ punpcklbw m2, m4
+ punpckhbw m3, m4
+ pmullw m0, m2
+ pmullw m1, m3
+ movu m2, [dstq+2*i]
+ movu m3, [dstq+2*i+mmsize]
+ paddw m0, m2
+ paddw m1, m3
+ movu [dstq+2*i], m0
+ movu [dstq+2*i+mmsize], m1
+%assign i i+mmsize
+%endrep
+ lea srcq, [srcq+strideq]
+ lea dstq, [dstq+2*strideq]
+ add obmcq, 32
+ sub yblend, 1
+ jg .loop
+ RET
+%endm
+
+INIT_MMX
+%if ARCH_X86_64 == 0
+PUT_RECT mmx
+ADD_RECT mmx
+
+HPEL_FILTER mmx
+ADD_OBMC 32, mmx
+ADD_OBMC 16, mmx
+%endif
+ADD_OBMC 8, mmx
+
+INIT_XMM
+PUT_RECT sse2
+ADD_RECT sse2
+
+HPEL_FILTER sse2
+ADD_OBMC 32, sse2
+ADD_OBMC 16, sse2
diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c
index e193d62..1f2b035 100644
--- a/libavcodec/x86/dnxhd_mmx.c
+++ b/libavcodec/x86/dnxhd_mmx.c
@@ -4,20 +4,20 @@
*
* VC-3 encoder funded by the British Broadcasting Corporation
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 2a8cad1..8b976f7 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -3,20 +3,20 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
@@ -31,6 +31,7 @@
#include "libavcodec/ac3dec.h"
#include "dsputil_mmx.h"
#include "idct_xvid.h"
+#include "diracdsp_mmx.h"
//#undef NDEBUG
//#include <assert.h>
@@ -1864,6 +1865,84 @@ void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
avg_pixels8_mmx2(dst, src, stride, 8);
}
+/* only used in VP3/5/6 */
+static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h)
+{
+// START_TIMER
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "movq (%1,%4), %%mm2 \n\t"
+ "movq (%2,%4), %%mm3 \n\t"
+ PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, (%3,%4) \n\t"
+
+ "movq (%1,%4,2), %%mm0 \n\t"
+ "movq (%2,%4,2), %%mm1 \n\t"
+ "movq (%1,%5), %%mm2 \n\t"
+ "movq (%2,%5), %%mm3 \n\t"
+ "lea (%1,%4,4), %1 \n\t"
+ "lea (%2,%4,4), %2 \n\t"
+ PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3,%4,2) \n\t"
+ "movq %%mm5, (%3,%5) \n\t"
+ "lea (%3,%4,4), %3 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+r"(h), "+r"(a), "+r"(b), "+r"(dst)
+ :"r"((x86_reg)stride), "r"((x86_reg)3L*stride)
+ :"memory");
+// STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx")
+}
+static void put_vp_no_rnd_pixels16_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h)
+{
+ put_vp_no_rnd_pixels8_l2_mmx(dst, a, b, stride, h);
+ put_vp_no_rnd_pixels8_l2_mmx(dst+8, a+8, b+8, stride, h);
+}
+
+#if CONFIG_DIRAC_DECODER
+#define DIRAC_PIXOP(OPNAME, EXT)\
+void ff_ ## OPNAME ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
+ OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
+}
+
+DIRAC_PIXOP(put, mmx)
+DIRAC_PIXOP(avg, mmx)
+DIRAC_PIXOP(avg, mmx2)
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ put_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ avg_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ put_pixels16_sse2(dst , src[0] , stride, h);
+ put_pixels16_sse2(dst+16, src[0]+16, stride, h);
+}
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ avg_pixels16_sse2(dst , src[0] , stride, h);
+ avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
+}
+#endif
+
/* XXX: those functions should be suppressed ASAP when all IDCTs are
converted */
#if CONFIG_GPL
@@ -2461,7 +2540,10 @@ void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->add_bytes= add_bytes_mmx;
if (!high_bit_depth)
- c->draw_edges = draw_edges_mmx;
+ c->draw_edges = draw_edges_mmx;
+
+ c->put_no_rnd_pixels_l2[0]= put_vp_no_rnd_pixels16_l2_mmx;
+ c->put_no_rnd_pixels_l2[1]= put_vp_no_rnd_pixels8_l2_mmx;
if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
c->h263_v_loop_filter= h263_v_loop_filter_mmx;
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index 097739c..6ba5ea8 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -2,20 +2,20 @@
* MMX optimized DSP utils
* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_mmx_avg_template.c
index 8b116b7..6f76859 100644
--- a/libavcodec/x86/dsputil_mmx_avg_template.c
+++ b/libavcodec/x86/dsputil_mmx_avg_template.c
@@ -7,20 +7,20 @@
* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
* and improved by Zdenek Kabelac <kabi@users.sf.net>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dsputil_mmx_qns_template.c b/libavcodec/x86/dsputil_mmx_qns_template.c
index 20a40a1..77a41b9 100644
--- a/libavcodec/x86/dsputil_mmx_qns_template.c
+++ b/libavcodec/x86/dsputil_mmx_qns_template.c
@@ -5,20 +5,20 @@
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>
* 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dsputil_mmx_rnd_template.c b/libavcodec/x86/dsputil_mmx_rnd_template.c
index 34a2c0b..e4c9138 100644
--- a/libavcodec/x86/dsputil_mmx_rnd_template.c
+++ b/libavcodec/x86/dsputil_mmx_rnd_template.c
@@ -7,20 +7,20 @@
* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
* and improved by Zdenek Kabelac <kabi@users.sf.net>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 09940d1..8e8c10c 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -2,24 +2,24 @@
;* MMX optimized DSP utils
;* Copyright (c) 2008 Loren Merritt
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
+%include "libavutil/x86/x86inc.asm"
%include "x86util.asm"
SECTION_RODATA
@@ -1177,8 +1177,10 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len
INIT_XMM sse
BUTTERFLIES_FLOAT_INTERLEAVE
+%if HAVE_AVX
INIT_YMM avx
BUTTERFLIES_FLOAT_INTERLEAVE
+%endif
INIT_XMM sse2
; %1 = aligned/unaligned
@@ -1301,3 +1303,4 @@ cglobal bswap32_buf, 3,4,3
mov [r0], r2d
.end:
RET
+
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 2a403ba..c0f9111 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -3,20 +3,20 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm
index cfd4e6d..1be359d 100644
--- a/libavcodec/x86/dsputilenc_yasm.asm
+++ b/libavcodec/x86/dsputilenc_yasm.asm
@@ -4,25 +4,25 @@
;* Copyright (c) 2000, 2001 Fabrice Bellard
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;*****************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION .text
diff --git a/libavcodec/x86/dwt.c b/libavcodec/x86/dwt.c
new file mode 100644
index 0000000..45b3b34
--- /dev/null
+++ b/libavcodec/x86/dwt.c
@@ -0,0 +1,202 @@
+/*
+ * MMX optimized discrete wavelet transform
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86_cpu.h"
+#include "dsputil_mmx.h"
+#include "dwt.h"
+
+#define COMPOSE_VERTICAL(ext, align) \
+void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
+void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
+void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
+void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
+void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
+void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+\
+static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
+\
+ ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
+} \
+\
+static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
+\
+ ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
+} \
+\
+static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
+ IDWTELEM *b3, IDWTELEM *b4, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
+\
+ ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
+} \
+\
+static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
+ IDWTELEM *b3, IDWTELEM *b4, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
+\
+ ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
+} \
+static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) { \
+ b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
+ b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
+ } \
+\
+ ff_vertical_compose_haar##ext(b0, b1, width_align); \
+} \
+static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+ int w2= w>>1;\
+ int x= w2 - (w2&(align-1));\
+ ff_horizontal_compose_haar0i##ext(b, tmp, w);\
+\
+ for (; x < w2; x++) {\
+ b[2*x ] = tmp[x];\
+ b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
+ }\
+}\
+static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+ int w2= w>>1;\
+ int x= w2 - (w2&(align-1));\
+ ff_horizontal_compose_haar1i##ext(b, tmp, w);\
+\
+ for (; x < w2; x++) {\
+ b[2*x ] = (tmp[x] + 1)>>1;\
+ b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
+ }\
+}\
+\
+
+#if HAVE_YASM
+#if !ARCH_X86_64
+COMPOSE_VERTICAL(_mmx, 4)
+#endif
+COMPOSE_VERTICAL(_sse2, 8)
+#endif
+
+
+void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
+
+static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
+{
+ int w2= w>>1;
+ int x= w2 - (w2&7);
+ ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
+
+ for (; x < w2; x++) {
+ b[2*x ] = (tmp[x] + 1)>>1;
+ b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+ }
+}
+
+void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
+{
+#if HAVE_YASM
+ int mm_flags = av_get_cpu_flags();
+
+#if !ARCH_X86_64
+ if (!(mm_flags & AV_CPU_FLAG_MMX))
+ return;
+
+ switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->vertical_compose_l0 = vertical_compose53iL0_mmx;
+ d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
+ break;
+ case DWT_DIRAC_LEGALL5_3:
+ d->vertical_compose_l0 = vertical_compose53iL0_mmx;
+ d->vertical_compose_h0 = vertical_compose_dirac53iH0_mmx;
+ break;
+ case DWT_DIRAC_DD13_7:
+ d->vertical_compose_l0 = vertical_compose_dd137iL0_mmx;
+ d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
+ break;
+ case DWT_DIRAC_HAAR0:
+ d->vertical_compose = vertical_compose_haar_mmx;
+ d->horizontal_compose = horizontal_compose_haar0i_mmx;
+ break;
+ case DWT_DIRAC_HAAR1:
+ d->vertical_compose = vertical_compose_haar_mmx;
+ d->horizontal_compose = horizontal_compose_haar1i_mmx;
+ break;
+ }
+#endif
+
+ if (!(mm_flags & AV_CPU_FLAG_SSE2))
+ return;
+
+ switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->vertical_compose_l0 = vertical_compose53iL0_sse2;
+ d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
+ break;
+ case DWT_DIRAC_LEGALL5_3:
+ d->vertical_compose_l0 = vertical_compose53iL0_sse2;
+ d->vertical_compose_h0 = vertical_compose_dirac53iH0_sse2;
+ break;
+ case DWT_DIRAC_DD13_7:
+ d->vertical_compose_l0 = vertical_compose_dd137iL0_sse2;
+ d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
+ break;
+ case DWT_DIRAC_HAAR0:
+ d->vertical_compose = vertical_compose_haar_sse2;
+ d->horizontal_compose = horizontal_compose_haar0i_sse2;
+ break;
+ case DWT_DIRAC_HAAR1:
+ d->vertical_compose = vertical_compose_haar_sse2;
+ d->horizontal_compose = horizontal_compose_haar1i_sse2;
+ break;
+ }
+
+ if (!(mm_flags & AV_CPU_FLAG_SSSE3))
+ return;
+
+ switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->horizontal_compose = horizontal_compose_dd97i_ssse3;
+ break;
+ }
+#endif // HAVE_YASM
+}
diff --git a/libavcodec/x86/dwt.h b/libavcodec/x86/dwt.h
new file mode 100644
index 0000000..199f611
--- /dev/null
+++ b/libavcodec/x86/dwt.h
@@ -0,0 +1,30 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DWT_H
+#define AVCODEC_X86_DWT_H
+
+#include "libavcodec/dwt.h"
+
+void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+
+void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type);
+
+#endif
diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dwt_yasm.asm
new file mode 100644
index 0000000..ac6c505
--- /dev/null
+++ b/libavcodec/x86/dwt_yasm.asm
@@ -0,0 +1,291 @@
+;******************************************************************************
+;* MMX optimized discrete wavelet trasnform
+;* Copyright (c) 2010 David Conrad
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+
+SECTION_RODATA
+pw_1: times 8 dw 1
+pw_2: times 8 dw 2
+pw_8: times 8 dw 8
+pw_16: times 8 dw 16
+pw_1991: times 4 dw 9,-1
+
+section .text
+
+; %1 -= (%2 + %3 + 2)>>2 %4 is pw_2
+%macro COMPOSE_53iL0 4
+ paddw %2, %3
+ paddw %2, %4
+ psraw %2, 2
+ psubw %1, %2
+%endm
+
+; m1 = %1 + (-m0 + 9*m1 + 9*%2 -%3 + 8)>>4
+; if %4 is supplied, %1 is loaded unaligned from there
+; m2: clobbered m3: pw_8 m4: pw_1991
+%macro COMPOSE_DD97iH0 3-4
+ paddw m0, %3
+ paddw m1, %2
+ psubw m0, m3
+ mova m2, m1
+ punpcklwd m1, m0
+ punpckhwd m2, m0
+ pmaddwd m1, m4
+ pmaddwd m2, m4
+%if %0 > 3
+ movu %1, %4
+%endif
+ psrad m1, 4
+ psrad m2, 4
+ packssdw m1, m2
+ paddw m1, %1
+%endm
+
+%macro COMPOSE_VERTICAL 1
+; void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; int width)
+cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
+ mova m2, [pw_2]
+.loop:
+ sub widthd, mmsize/2
+ mova m1, [b0q+2*widthq]
+ mova m0, [b1q+2*widthq]
+ COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
+ mova [b1q+2*widthq], m0
+ jg .loop
+ REP_RET
+
+; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; int width)
+cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
+ mova m1, [pw_1]
+.loop:
+ sub widthd, mmsize/2
+ mova m0, [b0q+2*widthq]
+ paddw m0, [b2q+2*widthq]
+ paddw m0, m1
+ psraw m0, 1
+ paddw m0, [b1q+2*widthq]
+ mova [b1q+2*widthq], m0
+ jg .loop
+ REP_RET
+
+; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; IDWTELEM *b3, IDWTELEM *b4, int width)
+cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
+ mova m3, [pw_8]
+ mova m4, [pw_1991]
+.loop:
+ sub widthd, mmsize/2
+ mova m0, [b0q+2*widthq]
+ mova m1, [b1q+2*widthq]
+ COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
+ mova [b2q+2*widthq], m1
+ jg .loop
+ REP_RET
+
+; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; IDWTELEM *b3, IDWTELEM *b4, int width)
+cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
+ mova m3, [pw_16]
+ mova m4, [pw_1991]
+.loop:
+ sub widthd, mmsize/2
+ mova m0, [b0q+2*widthq]
+ mova m1, [b1q+2*widthq]
+ mova m5, [b2q+2*widthq]
+ paddw m0, [b4q+2*widthq]
+ paddw m1, [b3q+2*widthq]
+ psubw m0, m3
+ mova m2, m1
+ punpcklwd m1, m0
+ punpckhwd m2, m0
+ pmaddwd m1, m4
+ pmaddwd m2, m4
+ psrad m1, 5
+ psrad m2, 5
+ packssdw m1, m2
+ psubw m5, m1
+ mova [b2q+2*widthq], m5
+ jg .loop
+ REP_RET
+
+; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
+cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
+ mova m3, [pw_1]
+.loop:
+ sub widthd, mmsize/2
+ mova m1, [b1q+2*widthq]
+ mova m0, [b0q+2*widthq]
+ mova m2, m1
+ paddw m1, m3
+ psraw m1, 1
+ psubw m0, m1
+ mova [b0q+2*widthq], m0
+ paddw m2, m0
+ mova [b1q+2*widthq], m2
+ jg .loop
+ REP_RET
+%endmacro
+
+; extend the left and right edges of the tmp array by %1 and %2 respectively
+%macro EDGE_EXTENSION 3
+ mov %3, [tmpq]
+%assign %%i 1
+%rep %1
+ mov [tmpq-2*%%i], %3
+ %assign %%i %%i+1
+%endrep
+ mov %3, [tmpq+2*w2q-2]
+%assign %%i 0
+%rep %2
+ mov [tmpq+2*w2q+2*%%i], %3
+ %assign %%i %%i+1
+%endrep
+%endmacro
+
+
+%macro HAAR_HORIZONTAL 2
+; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width)
+cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
+ mov w2d, wd
+ xor xq, xq
+ shr w2d, 1
+ lea b_w2q, [bq+wq]
+ mova m3, [pw_1]
+.lowpass_loop:
+ movu m1, [b_w2q + 2*xq]
+ mova m0, [bq + 2*xq]
+ paddw m1, m3
+ psraw m1, 1
+ psubw m0, m1
+ mova [tmpq + 2*xq], m0
+ add xq, mmsize/2
+ cmp xq, w2q
+ jl .lowpass_loop
+
+ xor xq, xq
+ and w2q, ~(mmsize/2 - 1)
+ cmp w2q, mmsize/2
+ jl .end
+
+.highpass_loop:
+ movu m1, [b_w2q + 2*xq]
+ mova m0, [tmpq + 2*xq]
+ paddw m1, m0
+
+ ; shift and interleave
+%if %2 == 1
+ paddw m0, m3
+ paddw m1, m3
+ psraw m0, 1
+ psraw m1, 1
+%endif
+ mova m2, m0
+ punpcklwd m0, m1
+ punpckhwd m2, m1
+ mova [bq+4*xq], m0
+ mova [bq+4*xq+mmsize], m2
+
+ add xq, mmsize/2
+ cmp xq, w2q
+ jl .highpass_loop
+.end:
+ REP_RET
+%endmacro
+
+
+INIT_XMM
+; void horizontal_compose_dd97i(IDWTELEM *b, IDWTELEM *tmp, int width)
+cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
+ mov w2d, wd
+ xor xd, xd
+ shr w2d, 1
+ lea b_w2q, [bq+wq]
+ movu m4, [bq+wq]
+ mova m7, [pw_2]
+ pslldq m4, 14
+.lowpass_loop:
+ movu m1, [b_w2q + 2*xq]
+ mova m0, [bq + 2*xq]
+ mova m2, m1
+ palignr m1, m4, 14
+ mova m4, m2
+ COMPOSE_53iL0 m0, m1, m2, m7
+ mova [tmpq + 2*xq], m0
+ add xd, mmsize/2
+ cmp xd, w2d
+ jl .lowpass_loop
+
+ EDGE_EXTENSION 1, 2, xw
+ ; leave the last up to 7 (sse) or 3 (mmx) values for C
+ xor xd, xd
+ and w2d, ~(mmsize/2 - 1)
+ cmp w2d, mmsize/2
+ jl .end
+
+ mova m7, [tmpq-mmsize]
+ mova m0, [tmpq]
+ mova m5, [pw_1]
+ mova m3, [pw_8]
+ mova m4, [pw_1991]
+.highpass_loop:
+ mova m6, m0
+ palignr m0, m7, 14
+ mova m7, [tmpq + 2*xq + 16]
+ mova m1, m7
+ mova m2, m7
+ palignr m1, m6, 2
+ palignr m2, m6, 4
+ COMPOSE_DD97iH0 m0, m6, m2, [b_w2q + 2*xq]
+ mova m0, m7
+ mova m7, m6
+
+ ; shift and interleave
+ paddw m6, m5
+ paddw m1, m5
+ psraw m6, 1
+ psraw m1, 1
+ mova m2, m6
+ punpcklwd m6, m1
+ punpckhwd m2, m1
+ mova [bq+4*xq], m6
+ mova [bq+4*xq+mmsize], m2
+
+ add xd, mmsize/2
+ cmp xd, w2d
+ jl .highpass_loop
+.end:
+ REP_RET
+
+
+%if ARCH_X86_64 == 0
+INIT_MMX
+COMPOSE_VERTICAL mmx
+HAAR_HORIZONTAL mmx, 0
+HAAR_HORIZONTAL mmx, 1
+%endif
+
+;;INIT_XMM
+INIT_XMM
+COMPOSE_VERTICAL sse2
+HAAR_HORIZONTAL sse2, 0
+HAAR_HORIZONTAL sse2, 1
diff --git a/libavcodec/x86/fdct_mmx.c b/libavcodec/x86/fdct_mmx.c
index cc3036b..3662242 100644
--- a/libavcodec/x86/fdct_mmx.c
+++ b/libavcodec/x86/fdct_mmx.c
@@ -13,20 +13,20 @@
* a page about fdct at http://www.geocities.com/ssavekar/dct.htm
* Skal's fdct at http://skal.planet-d.net/coding/dct.html
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -68,7 +68,7 @@ DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
-static struct
+static const struct
{
DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
} fdct_r_row_sse2 =
@@ -151,7 +151,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = { // forward_dct
29692, -12299, 26722, -31521,
};
-static struct
+static const struct
{
DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
} tab_frw_01234567_sse2 =
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index 3e0c42f..8544e32 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index 9d68d5b..7fdc858 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fft_3dn.c b/libavcodec/x86/fft_3dn.c
index 5a4d3ad..6f2e2e8 100644
--- a/libavcodec/x86/fft_3dn.c
+++ b/libavcodec/x86/fft_3dn.c
@@ -2,20 +2,20 @@
* FFT/MDCT transform with 3DNow! optimizations
* Copyright (c) 2008 Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fft_3dn2.c b/libavcodec/x86/fft_3dn2.c
index ce3c9da..f3c5dd0 100644
--- a/libavcodec/x86/fft_3dn2.c
+++ b/libavcodec/x86/fft_3dn2.c
@@ -2,20 +2,20 @@
* FFT/MDCT transform with Extended 3DNow! optimizations
* Copyright (c) 2006-2008 Zuxy MENG Jie, Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index a2f26cc..bea31fe 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -6,20 +6,20 @@
;* This algorithm (though not any of the implementation details) is
;* based on libdjbfft by D. J. Bernstein.
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -28,7 +28,7 @@
; in blocks as conventient to the vector size.
; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively)
-%include "x86inc.asm"
+%include "libavutil/x86/x86inc.asm"
%if ARCH_X86_64
%define pointer resq
@@ -388,6 +388,7 @@ fft32_interleave_avx:
sub r2d, mmsize/4
jg .deint_loop
ret
+
%endif
INIT_XMM
diff --git a/libavcodec/x86/fft_sse.c b/libavcodec/x86/fft_sse.c
index 13b992f..a09ad38 100644
--- a/libavcodec/x86/fft_sse.c
+++ b/libavcodec/x86/fft_sse.c
@@ -2,20 +2,20 @@
* FFT/MDCT transform with SSE optimizations
* Copyright (c) 2008 Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 3f39c7e..ca39aa3 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -2,25 +2,25 @@
;* x86 optimized Format Conversion Utils
;* Copyright (c) 2008 Loren Merritt
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_TEXT
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 42cb0bc..ca0b293 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -3,20 +3,20 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index 8b621fa..fc615c7 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -3,25 +3,25 @@
;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
;* 2005-2008 Loren Merritt
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index f264edb..08625e4 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -7,25 +7,25 @@
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;* Oskar Arvidsson <oskar@irock.se>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION .text
@@ -386,8 +386,10 @@ cglobal deblock_h_luma_8_%1, 5,7
INIT_XMM
DEBLOCK_LUMA sse2
+%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA avx
+%endif
%else
@@ -505,8 +507,10 @@ INIT_MMX
DEBLOCK_LUMA mmxext, v8, 8
INIT_XMM
DEBLOCK_LUMA sse2, v, 16
+%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA avx, v, 16
+%endif
%endif ; ARCH
@@ -777,8 +781,10 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
INIT_XMM
DEBLOCK_LUMA_INTRA sse2, v
+%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA_INTRA avx , v
+%endif
%if ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_LUMA_INTRA mmxext, v8
@@ -835,7 +841,11 @@ cglobal deblock_h_chroma_8_mmxext, 5,7
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
movq buf0, m0
movq buf1, m3
- call ff_chroma_inter_body_mmxext
+ LOAD_MASK r2d, r3d
+ movd m6, [r4] ; tc0
+ punpcklbw m6, m6
+ pand m7, m6
+ DEBLOCK_P0_Q0
movq m0, buf0
movq m3, buf1
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index a4ccafc..562d81c 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -24,8 +24,8 @@
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
@@ -165,7 +165,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
SUB rsp, pad
shl r2d, 2
shl r3d, 2
- LOAD_AB m4, m5, r2, r3
+ LOAD_AB m4, m5, r2d, r3d
mov r3, 32/mmsize
mov r2, r0
sub r0, r1
@@ -222,7 +222,7 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
SUB rsp, pad
shl r2d, 2
shl r3d, 2
- LOAD_AB m4, m5, r2, r3
+ LOAD_AB m4, m5, r2d, r3d
mov r3, r1
mova am, m4
add r3, r1
@@ -352,7 +352,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
%define mask2 m11
shl r2d, 2
shl r3d, 2
- LOAD_AB m12, m13, r2, r3
+ LOAD_AB m12, m13, r2d, r3d
mov r2, r0
sub r0, r1
sub r0, r1
@@ -380,7 +380,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
cglobal deblock_h_luma_10_%1, 5,7,15
shl r2d, 2
shl r3d, 2
- LOAD_AB m12, m13, r2, r3
+ LOAD_AB m12, m13, r2d, r3d
mov r2, r1
add r2, r1
add r2, r1
@@ -419,9 +419,11 @@ cglobal deblock_h_luma_10_%1, 5,7,15
INIT_XMM
DEBLOCK_LUMA_64 sse2
+%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA_64 avx
%endif
+%endif
%macro SWAPMOVA 2
%ifid %1
@@ -714,8 +716,10 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,16
INIT_XMM
DEBLOCK_LUMA_INTRA_64 sse2
+%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA_INTRA_64 avx
+%endif
%endif
@@ -799,10 +803,12 @@ DEBLOCK_LUMA_INTRA mmxext
INIT_XMM
DEBLOCK_LUMA sse2
DEBLOCK_LUMA_INTRA sse2
+%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA avx
DEBLOCK_LUMA_INTRA avx
%endif
+%endif
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', %2=q0'
@@ -858,7 +864,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
.loop:
%endif
CHROMA_V_LOAD r5
- LOAD_AB m4, m5, r2, r3
+ LOAD_AB m4, m5, r2d, r3d
LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4
pxor m4, m4
CHROMA_V_LOAD_TC m6, r4
@@ -892,7 +898,7 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
.loop:
%endif
CHROMA_V_LOAD r4
- LOAD_AB m4, m5, r2, r3
+ LOAD_AB m4, m5, r2d, r3d
LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4
CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6
CHROMA_V_STORE
@@ -913,5 +919,7 @@ DEBLOCK_CHROMA mmxext
%endif
INIT_XMM
DEBLOCK_CHROMA sse2
+%if HAVE_AVX
INIT_AVX
DEBLOCK_CHROMA avx
+%endif
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index e195e04..510f726 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -2,20 +2,20 @@
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 5e8c0ed..25f4755 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -9,25 +9,25 @@
;* Holger Lubitz <hal@duncan.ol.sub.de>
;* Min Chen <chenm001.163.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;*****************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 501c2a4..27a18f4 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -22,8 +22,8 @@
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index c6b4386..f97d051 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -5,25 +5,25 @@
;* Copyright (c) 2010 Loren Merritt
;* Copyright (c) 2010 Ronald S. Bultje
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 1423b56..79fa23e 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -22,8 +22,8 @@
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 41e611e..58740e2 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2010 Jason Garrett-Glaser
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -188,7 +188,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
if (chroma_format_idc == 1)
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx;
if (codec_id == CODEC_ID_SVQ3) {
- h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
+ if (mm_flags & AV_CPU_FLAG_CMOV)
+ h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx;
} else if (codec_id == CODEC_ID_RV40) {
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx;
} else {
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index b7a4183..807d854 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -2,20 +2,20 @@
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
* Copyright (c) 2011 Daniel Kang
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm
index 1c40e49..b2fb663 100644
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -4,24 +4,24 @@
;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
+%include "libavutil/x86/x86inc.asm"
SECTION .text
@@ -253,6 +253,13 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
add off_regd, 1
or off_regd, 1
add r4, 1
+ cmp r5, 128
+ jne .normal
+ sar r5, 1
+ sar r6, 1
+ sar off_regd, 1
+ sub r4, 1
+.normal
movd m4, r5d
movd m0, r6d
movd m5, off_regd
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index dcd9180..063e3de 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -361,7 +361,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
if (chroma_format_idc == 1)
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
- c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
+ if (mm_flags & AV_CPU_FLAG_CMOV)
+ c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
@@ -419,7 +420,7 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3;
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3;
}
- if (mm_flags&AV_CPU_FLAG_AVX) {
+ if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) {
#if HAVE_ALIGNED_STACK
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c
index 139798e..1b48ab5 100644
--- a/libavcodec/x86/idct_mmx_xvid.c
+++ b/libavcodec/x86/idct_mmx_xvid.c
@@ -22,20 +22,20 @@
*
* conversion to gcc syntax by Michael Niedermayer
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
- * along with Libav; if not, write to the Free Software Foundation,
+ * along with FFmpeg; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index 968b400..fc75a57 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -9,7 +9,7 @@
*
* Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
* Vertical pass is an implementation of the scheme:
* Loeffler C., Ligtenberg A., and Moschytz C.S.:
@@ -23,18 +23,18 @@
*
* More details at http://skal.planet-d.net/coding/dct.html
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
- * along with Libav; if not, write to the Free Software Foundation,
+ * along with FFmpeg; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h
index 495d2ca..be91d1c 100644
--- a/libavcodec/x86/idct_xvid.h
+++ b/libavcodec/x86/idct_xvid.h
@@ -1,20 +1,20 @@
/*
* XVID MPEG-4 VIDEO CODEC
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/imdct36_sse.asm b/libavcodec/x86/imdct36_sse.asm
index 937a2cc..336e9f0 100644
--- a/libavcodec/x86/imdct36_sse.asm
+++ b/libavcodec/x86/imdct36_sse.asm
@@ -371,8 +371,10 @@ DEFINE_IMDCT
INIT_XMM ssse3
DEFINE_IMDCT
+%if HAVE_AVX
INIT_XMM avx
DEFINE_IMDCT
+%endif
INIT_XMM sse
@@ -717,5 +719,7 @@ cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
INIT_XMM sse
DEFINE_FOUR_IMDCT
+%if HAVE_AVX
INIT_XMM avx
DEFINE_FOUR_IMDCT
+%endif
diff --git a/libavcodec/x86/lpc_mmx.c b/libavcodec/x86/lpc_mmx.c
index d41c19b..1c202e2 100644
--- a/libavcodec/x86/lpc_mmx.c
+++ b/libavcodec/x86/lpc_mmx.c
@@ -2,20 +2,20 @@
* MMX optimized LPC DSP utils
* Copyright (c) 2007 Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
index 50b0283..33d9a6c 100644
--- a/libavcodec/x86/mathops.h
+++ b/libavcodec/x86/mathops.h
@@ -2,20 +2,20 @@
* simple math operations
* Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c
index 400855d..7ea77fc 100644
--- a/libavcodec/x86/mlpdsp.c
+++ b/libavcodec/x86/mlpdsp.c
@@ -2,20 +2,20 @@
* MLP DSP functions x86-optimized
* Copyright (c) 2009 Ramiro Polla
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/motion_est_mmx.c b/libavcodec/x86/motion_est_mmx.c
index a522a5e..33bb020 100644
--- a/libavcodec/x86/motion_est_mmx.c
+++ b/libavcodec/x86/motion_est_mmx.c
@@ -5,20 +5,20 @@
*
* mostly by Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/mpegaudiodec_mmx.c b/libavcodec/x86/mpegaudiodec_mmx.c
index f51a06d..939b441 100644
--- a/libavcodec/x86/mpegaudiodec_mmx.c
+++ b/libavcodec/x86/mpegaudiodec_mmx.c
@@ -2,20 +2,20 @@
* MMX optimized MP3 decoding functions
* Copyright (c) 2010 Vitor Sessak
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -211,11 +211,17 @@ static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
} \
}
+#if HAVE_YASM
+#if HAVE_SSE
DECL_IMDCT_BLOCKS(sse,sse)
DECL_IMDCT_BLOCKS(sse2,sse)
DECL_IMDCT_BLOCKS(sse3,sse)
DECL_IMDCT_BLOCKS(ssse3,sse)
+#endif
+#if HAVE_AVX
DECL_IMDCT_BLOCKS(avx,avx)
+#endif
+#endif
void ff_mpadsp_init_mmx(MPADSPContext *s)
{
@@ -239,8 +245,11 @@ void ff_mpadsp_init_mmx(MPADSPContext *s)
s->apply_window_float = apply_window_mp3;
}
#if HAVE_YASM
- if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
+ if (0) {
+#if HAVE_AVX
+ } else if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
s->imdct36_blocks_float = imdct36_blocks_avx;
+#endif
#if HAVE_SSE
} else if (mm_flags & AV_CPU_FLAG_SSSE3) {
s->imdct36_blocks_float = imdct36_blocks_ssse3;
diff --git a/libavcodec/x86/mpegvideo_mmx.c b/libavcodec/x86/mpegvideo_mmx.c
index dcce486..6739505 100644
--- a/libavcodec/x86/mpegvideo_mmx.c
+++ b/libavcodec/x86/mpegvideo_mmx.c
@@ -5,20 +5,20 @@
* Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
* h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/mpegvideo_mmx_template.c b/libavcodec/x86/mpegvideo_mmx_template.c
index 13653c8..9119476 100644
--- a/libavcodec/x86/mpegvideo_mmx_template.c
+++ b/libavcodec/x86/mpegvideo_mmx_template.c
@@ -3,20 +3,20 @@
*
* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -110,10 +110,15 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if (s->mb_intra) {
int dummy;
- if (n < 4)
+ if (n < 4){
q = s->y_dc_scale;
- else
+ bias = s->q_intra_matrix16[qscale][1];
+ qmat = s->q_intra_matrix16[qscale][0];
+ }else{
q = s->c_dc_scale;
+ bias = s->q_chroma_intra_matrix16[qscale][1];
+ qmat = s->q_chroma_intra_matrix16[qscale][0];
+ }
/* note: block[0] is assumed to be positive */
if (!s->h263_aic) {
__asm__ volatile (
@@ -128,8 +133,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
last_non_zero_p1 = 1;
- bias = s->q_intra_matrix16[qscale][1];
- qmat = s->q_intra_matrix16[qscale][0];
} else {
last_non_zero_p1 = 0;
bias = s->q_inter_matrix16[qscale][1];
diff --git a/libavcodec/x86/pngdsp-init.c b/libavcodec/x86/pngdsp-init.c
index 136e92e..f122b24 100644
--- a/libavcodec/x86/pngdsp-init.c
+++ b/libavcodec/x86/pngdsp-init.c
@@ -2,20 +2,20 @@
* x86 PNG optimizations.
* Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index d6e6374..8999c17 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -28,7 +28,7 @@ SECTION_RODATA
cextern pw_255
-section .text align=16
+SECTION_TEXT 16
; %1 = nr. of xmm registers used
%macro ADD_BYTES_FN 1
diff --git a/libavcodec/x86/proresdsp-init.c b/libavcodec/x86/proresdsp-init.c
index f202f9f..c4aeb7f 100644
--- a/libavcodec/x86/proresdsp-init.c
+++ b/libavcodec/x86/proresdsp-init.c
@@ -29,11 +29,14 @@ void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
DCTELEM *block, const int16_t *qmat);
-void ff_proresdsp_x86_init(ProresDSPContext *dsp)
+void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx)
{
#if ARCH_X86_64 && HAVE_YASM
int flags = av_get_cpu_flags();
+ if(avctx->flags & CODEC_FLAG_BITEXACT)
+ return;
+
if (flags & AV_CPU_FLAG_SSE2) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse2;
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
index 9b2e11e..a09d871 100644
--- a/libavcodec/x86/proresdsp.asm
+++ b/libavcodec/x86/proresdsp.asm
@@ -1,23 +1,24 @@
;******************************************************************************
;* x86-SIMD-optimized IDCT for prores
-;* this is identical to "simple" IDCT except for the clip range
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
;*
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -48,10 +49,10 @@ w1_plus_w5: times 4 dw W1sh2, +W5sh2
w5_min_w1: times 4 dw W5sh2, -W1sh2
w5_plus_w7: times 4 dw W5sh2, +W7sh2
w7_min_w5: times 4 dw W7sh2, -W5sh2
-row_round: times 8 dw (1<<14)
+pw_88: times 8 dw 0x2008
+cextern pw_1
cextern pw_4
-cextern pw_8
cextern pw_512
cextern pw_1019
@@ -93,14 +94,12 @@ section .text align=16
; a2 -= W6 * row[2];
; a3 -= W2 * row[2];
%ifidn %1, col
- paddw m10,[pw_8]
+ paddw m10,[pw_88]
%endif
- SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[0], row[2] }[0-3]/[4-7]
%ifidn %1, row
- psubw m10,[row_round]
+ paddw m10,[pw_1]
%endif
- SIGNEXTEND m8, m9, m14 ; { row[2] }[0-3] / [4-7]
- SIGNEXTEND m10, m11, m14 ; { row[0] }[0-3] / [4-7]
+ SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[0], row[2] }[0-3]/[4-7]
pmaddwd m2, m0, [w4_plus_w6]
pmaddwd m3, m1, [w4_plus_w6]
pmaddwd m4, m0, [w4_min_w6]
@@ -109,75 +108,33 @@ section .text align=16
pmaddwd m7, m1, [w4_min_w2]
pmaddwd m0, [w4_plus_w2]
pmaddwd m1, [w4_plus_w2]
- pslld m2, 2
- pslld m3, 2
- pslld m4, 2
- pslld m5, 2
- pslld m6, 2
- pslld m7, 2
- pslld m0, 2
- pslld m1, 2
; a0: -1*row[0]-1*row[2]
; a1: -1*row[0]
; a2: -1*row[0]
; a3: -1*row[0]+1*row[2]
- psubd m2, m10 ; a1[0-3]
- psubd m3, m11 ; a1[4-7]
- psubd m4, m10 ; a2[0-3]
- psubd m5, m11 ; a2[4-7]
- psubd m0, m10
- psubd m1, m11
- psubd m6, m10
- psubd m7, m11
- psubd m0, m8 ; a0[0-3]
- psubd m1, m9 ; a0[4-7]
- paddd m6, m8 ; a3[0-3]
- paddd m7, m9 ; a3[4-7]
; a0 += W4*row[4] + W6*row[6]; i.e. -1*row[4]
; a1 -= W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
; a2 -= W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
; a3 += W4*row[4] - W6*row[6]; i.e. -1*row[4]
SBUTTERFLY3 wd, 8, 9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
- SIGNEXTEND m13, m14, m10 ; { row[4] }[0-3] / [4-7]
pmaddwd m10, m8, [w4_plus_w6]
pmaddwd m11, m9, [w4_plus_w6]
- pslld m10, 2
- pslld m11, 2
- psubd m10, m13
- psubd m11, m14
paddd m0, m10 ; a0[0-3]
paddd m1, m11 ; a0[4-7]
pmaddwd m10, m8, [w4_min_w6]
pmaddwd m11, m9, [w4_min_w6]
- pslld m10, 2
- pslld m11, 2
- psubd m10, m13
- psubd m11, m14
paddd m6, m10 ; a3[0-3]
paddd m7, m11 ; a3[4-7]
pmaddwd m10, m8, [w4_min_w2]
pmaddwd m11, m9, [w4_min_w2]
pmaddwd m8, [w4_plus_w2]
pmaddwd m9, [w4_plus_w2]
- pslld m10, 2
- pslld m11, 2
- pslld m8, 2
- pslld m9, 2
- psubd m10, m13
- psubd m11, m14
- psubd m8, m13
- psubd m9, m14
psubd m4, m10 ; a2[0-3] intermediate
psubd m5, m11 ; a2[4-7] intermediate
psubd m2, m8 ; a1[0-3] intermediate
psubd m3, m9 ; a1[4-7] intermediate
- SIGNEXTEND m12, m13, m10 ; { row[6] }[0-3] / [4-7]
- psubd m4, m12 ; a2[0-3]
- psubd m5, m13 ; a2[4-7]
- paddd m2, m12 ; a1[0-3]
- paddd m3, m13 ; a1[4-7]
; load/store
mova [r2+ 0], m0
@@ -208,8 +165,6 @@ section .text align=16
; b3 = MUL(W7, row[1]);
; MAC(b3, -W5, row[3]);
SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[1], row[3] }[0-3]/[4-7]
- SIGNEXTEND m10, m11, m12 ; { row[1] }[0-3] / [4-7]
- SIGNEXTEND m8, m9, m12 ; { row[3] }[0-3] / [4-7]
pmaddwd m2, m0, [w3_min_w7]
pmaddwd m3, m1, [w3_min_w7]
pmaddwd m4, m0, [w5_min_w1]
@@ -218,35 +173,11 @@ section .text align=16
pmaddwd m7, m1, [w7_min_w5]
pmaddwd m0, [w1_plus_w3]
pmaddwd m1, [w1_plus_w3]
- pslld m2, 2
- pslld m3, 2
- pslld m4, 2
- pslld m5, 2
- pslld m6, 2
- pslld m7, 2
- pslld m0, 2
- pslld m1, 2
; b0: +1*row[1]+2*row[3]
; b1: +2*row[1]-1*row[3]
; b2: -1*row[1]-1*row[3]
; b3: +1*row[1]+1*row[3]
- psubd m2, m8
- psubd m3, m9
- paddd m0, m8
- paddd m1, m9
- paddd m8, m10 ; { row[1] + row[3] }[0-3]
- paddd m9, m11 ; { row[1] + row[3] }[4-7]
- paddd m10, m10
- paddd m11, m11
- paddd m0, m8 ; b0[0-3]
- paddd m1, m9 ; b0[4-7]
- paddd m2, m10 ; b1[0-3]
- paddd m3, m11 ; b2[4-7]
- psubd m4, m8 ; b2[0-3]
- psubd m5, m9 ; b2[4-7]
- paddd m6, m8 ; b3[0-3]
- paddd m7, m9 ; b3[4-7]
; MAC(b0, W5, row[5]);
; MAC(b0, W7, row[7]);
@@ -257,38 +188,16 @@ section .text align=16
; MAC(b3, W3, row[5]);
; MAC(b3, -W1, row[7]);
SBUTTERFLY3 wd, 8, 9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
- SIGNEXTEND m13, m12, m11 ; { row[5] }[0-3] / [4-7]
- SIGNEXTEND m14, m11, m10 ; { row[7] }[0-3] / [4-7]
; b0: -1*row[5]+1*row[7]
; b1: -1*row[5]+1*row[7]
; b2: +1*row[5]+2*row[7]
; b3: +2*row[5]-1*row[7]
- paddd m4, m13
- paddd m5, m12
- paddd m6, m13
- paddd m7, m12
- psubd m13, m14 ; { row[5] - row[7] }[0-3]
- psubd m12, m11 ; { row[5] - row[7] }[4-7]
- paddd m14, m14
- paddd m11, m11
- psubd m0, m13
- psubd m1, m12
- psubd m2, m13
- psubd m3, m12
- paddd m4, m14
- paddd m5, m11
- paddd m6, m13
- paddd m7, m12
pmaddwd m10, m8, [w1_plus_w5]
pmaddwd m11, m9, [w1_plus_w5]
pmaddwd m12, m8, [w5_plus_w7]
pmaddwd m13, m9, [w5_plus_w7]
- pslld m10, 2
- pslld m11, 2
- pslld m12, 2
- pslld m13, 2
psubd m2, m10 ; b1[0-3]
psubd m3, m11 ; b1[4-7]
paddd m0, m12 ; b0[0-3]
@@ -297,10 +206,6 @@ section .text align=16
pmaddwd m13, m9, [w7_plus_w3]
pmaddwd m8, [w3_min_w1]
pmaddwd m9, [w3_min_w1]
- pslld m12, 2
- pslld m13, 2
- pslld m8, 2
- pslld m9, 2
paddd m4, m12 ; b2[0-3]
paddd m5, m13 ; b2[4-7]
paddd m6, m8 ; b3[0-3]
@@ -347,7 +252,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
pmullw m13,[r3+64]
pmullw m12,[r3+96]
- IDCT_1D row, 17, %1
+ IDCT_1D row, 15, %1
; transpose for second part of IDCT
TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
@@ -362,20 +267,11 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
; for (i = 0; i < 8; i++)
; idctSparseColAdd(dest + i, line_size, block + i);
- IDCT_1D col, 20, %1
+ IDCT_1D col, 18, %1
; clip/store
- mova m6, [pw_512]
mova m3, [pw_4]
mova m5, [pw_1019]
- paddw m8, m6
- paddw m0, m6
- paddw m1, m6
- paddw m2, m6
- paddw m4, m6
- paddw m11, m6
- paddw m9, m6
- paddw m10, m6
pmaxsw m8, m3
pmaxsw m0, m3
pmaxsw m1, m3
@@ -406,27 +302,13 @@ cglobal prores_idct_put_10_%1, 4, 4, %2
RET
%endmacro
-%macro signextend_sse2 3 ; dstlow, dsthigh, tmp
- pxor %3, %3
- pcmpgtw %3, %1
- mova %2, %1
- punpcklwd %1, %3
- punpckhwd %2, %3
-%endmacro
-
-%macro signextend_sse4 2-3 ; dstlow, dsthigh
- movhlps %2, %1
- pmovsxwd %1, %1
- pmovsxwd %2, %2
-%endmacro
-
INIT_XMM
-%define SIGNEXTEND signextend_sse2
idct_put_fn sse2, 16
INIT_XMM
-%define SIGNEXTEND signextend_sse4
idct_put_fn sse4, 16
+%if HAVE_AVX
INIT_AVX
idct_put_fn avx, 16
+%endif
%endif
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 2d2f6e1..1e152b7 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -39,7 +39,7 @@ SECTION .text
cglobal rv34_idct_%1_mmx2, 1, 2, 0
movsx r1, word [r0]
IDCT_DC r1
- movd m0, r1
+ movd m0, r1d
pshufw m0, m0, 0
movq [r0+ 0], m0
movq [r0+ 8], m0
@@ -59,7 +59,7 @@ cglobal rv34_idct_dc_add_mmx, 3, 3
; calculate DC
IDCT_DC_ROUND r2
pxor m1, m1
- movd m0, r2
+ movd m0, r2d
psubw m1, m0
packuswb m0, m0
packuswb m1, m1
@@ -96,7 +96,7 @@ cglobal rv34_idct_dc_add_sse4, 3, 3, 6
pxor m1, m1
; calculate DC
- movd m0, r2
+ movd m0, r2d
lea r2, [r0+r1*2]
movd m2, [r0]
movd m3, [r0+r1]
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index bff3e7b..c13e9f0 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -164,8 +164,8 @@ cglobal rv40_weight_func_%1, 6, 7, %2
; Use result of test now
jz .loop_512
- movd m2, r3
- movd m3, r4
+ movd m2, r3d
+ movd m3, r4d
SPLATW m2, m2
SPLATW m3, m3
@@ -178,8 +178,8 @@ cglobal rv40_weight_func_%1, 6, 7, %2
.loop_512:
sar r3, 9
sar r4, 9
- movd m2, r3
- movd m3, r4
+ movd m2, r3d
+ movd m3, r4d
%if cpuflag(ssse3)
punpcklbw m3, m2
SPLATW m3, m3
diff --git a/libavcodec/x86/simple_idct_mmx.c b/libavcodec/x86/simple_idct_mmx.c
index dc285cf..db479ce 100644
--- a/libavcodec/x86/simple_idct_mmx.c
+++ b/libavcodec/x86/simple_idct_mmx.c
@@ -3,20 +3,20 @@
*
* Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/dsputil.h"
diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c
index 3e6bc99..f107d55 100644
--- a/libavcodec/x86/snowdsp_mmx.c
+++ b/libavcodec/x86/snowdsp_mmx.c
@@ -2,20 +2,20 @@
* MMX and SSE2 optimized snow DSP utils
* Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -675,14 +675,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
#define snow_inner_add_yblock_sse2_end_8\
"sal $1, %%"REG_c" \n\t"\
- "add $"PTR_SIZE"*2, %1 \n\t"\
+ "add"OPSIZE" $"PTR_SIZE"*2, %1 \n\t"\
snow_inner_add_yblock_sse2_end_common1\
"sar $1, %%"REG_c" \n\t"\
"sub $2, %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
#define snow_inner_add_yblock_sse2_end_16\
- "add $"PTR_SIZE"*1, %1 \n\t"\
+ "add"OPSIZE" $"PTR_SIZE"*1, %1 \n\t"\
snow_inner_add_yblock_sse2_end_common1\
"dec %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
diff --git a/libavcodec/x86/v210-init.c b/libavcodec/x86/v210-init.c
new file mode 100644
index 0000000..425c628
--- /dev/null
+++ b/libavcodec/x86/v210-init.c
@@ -0,0 +1,48 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavcodec/v210dec.h"
+
+extern void ff_v210_planar_unpack_unaligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+extern void ff_v210_planar_unpack_unaligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+
+extern void ff_v210_planar_unpack_aligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+
+av_cold void v210_x86_init(V210DecContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+#if HAVE_YASM
+ if (s->aligned_input) {
+ if (cpu_flags & AV_CPU_FLAG_SSSE3)
+ s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3;
+
+ if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX)
+ s->unpack_frame = ff_v210_planar_unpack_aligned_avx;
+ }
+ else {
+ if (cpu_flags & AV_CPU_FLAG_SSSE3)
+ s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3;
+
+ if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX)
+ s->unpack_frame = ff_v210_planar_unpack_unaligned_avx;
+ }
+#endif
+}
diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm
new file mode 100644
index 0000000..f39419e
--- /dev/null
+++ b/libavcodec/x86/v210.asm
@@ -0,0 +1,89 @@
+;******************************************************************************
+;* V210 SIMD unpack
+;* Copyright (c) 2011 Loren Merritt <lorenm@u.washington.edu>
+;* Copyright (c) 2011 Kieran Kunhya <kieran@kunhya.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+v210_mask: times 4 dd 0x3ff
+v210_mult: dw 64,4,64,4,64,4,64,4
+v210_luma_shuf: db 8,9,0,1,2,3,12,13,4,5,6,7,-1,-1,-1,-1
+v210_chroma_shuf: db 0,1,8,9,6,7,-1,-1,2,3,4,5,12,13,-1,-1
+
+SECTION .text
+
+%macro v210_planar_unpack 2
+
+; v210_planar_unpack(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
+cglobal v210_planar_unpack_%1_%2, 5, 5
+ movsxdifnidn r4, r4d
+ lea r1, [r1+2*r4]
+ add r2, r4
+ add r3, r4
+ neg r4
+
+ mova m3, [v210_mult]
+ mova m4, [v210_mask]
+ mova m5, [v210_luma_shuf]
+ mova m6, [v210_chroma_shuf]
+.loop
+%ifidn %1, unaligned
+ movu m0, [r0]
+%else
+ mova m0, [r0]
+%endif
+
+ pmullw m1, m0, m3
+ psrld m0, 10
+ psrlw m1, 6 ; u0 v0 y1 y2 v1 u2 y4 y5
+ pand m0, m4 ; y0 __ u1 __ y3 __ v2 __
+
+ shufps m2, m1, m0, 0x8d ; y1 y2 y4 y5 y0 __ y3 __
+ pshufb m2, m5 ; y0 y1 y2 y3 y4 y5 __ __
+ movu [r1+2*r4], m2
+
+ shufps m1, m0, 0xd8 ; u0 v0 v1 u2 u1 __ v2 __
+ pshufb m1, m6 ; u0 u1 u2 __ v0 v1 v2 __
+ movq [r2+r4], m1
+ movhps [r3+r4], m1
+
+ add r0, mmsize
+ add r4, 6
+ jl .loop
+
+ REP_RET
+%endmacro
+
+INIT_XMM
+v210_planar_unpack unaligned, ssse3
+%if HAVE_AVX
+INIT_AVX
+v210_planar_unpack unaligned, avx
+%endif
+
+INIT_XMM
+v210_planar_unpack aligned, ssse3
+%if HAVE_AVX
+INIT_AVX
+v210_planar_unpack aligned, avx
+%endif
diff --git a/libavcodec/x86/vc1dsp_yasm.asm b/libavcodec/x86/vc1dsp_yasm.asm
index 66f61db..1eba3c1 100644
--- a/libavcodec/x86/vc1dsp_yasm.asm
+++ b/libavcodec/x86/vc1dsp_yasm.asm
@@ -2,25 +2,25 @@
;* VC1 deblocking optimizations
;* Copyright (c) 2009 David Conrad
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
cextern pw_4
cextern pw_5
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 791cc8e..99621fb 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -2,25 +2,25 @@
;* MMX/SSE2-optimized functions for the VP3 decoder
;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
; MMX-optimized functions cribbed from the original VP3 source code.
diff --git a/libavcodec/x86/vp56_arith.h b/libavcodec/x86/vp56_arith.h
index be2dd30..ddbf38b 100644
--- a/libavcodec/x86/vp56_arith.h
+++ b/libavcodec/x86/vp56_arith.h
@@ -4,20 +4,20 @@
* Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
* Copyright (C) 2010 Eli Friedman
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vp56dsp.asm b/libavcodec/x86/vp56dsp.asm
index 66a97f1..2d409bf 100644
--- a/libavcodec/x86/vp56dsp.asm
+++ b/libavcodec/x86/vp56dsp.asm
@@ -3,25 +3,25 @@
;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
cextern pw_64
diff --git a/libavcodec/x86/vp56dsp_init.c b/libavcodec/x86/vp56dsp_init.c
index 29892812..87fc935 100644
--- a/libavcodec/x86/vp56dsp_init.c
+++ b/libavcodec/x86/vp56dsp_init.c
@@ -3,20 +3,20 @@
* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c
index f5e89fa..a75fdf5 100644
--- a/libavcodec/x86/vp8dsp-init.c
+++ b/libavcodec/x86/vp8dsp-init.c
@@ -3,20 +3,20 @@
* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 7d9ebc9..833c88a 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -3,25 +3,25 @@
;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
-%include "x86inc.asm"
-%include "x86util.asm"
+%include "libavutil/x86/x86inc.asm"
+%include "libavutil/x86/x86util.asm"
SECTION_RODATA
OpenPOWER on IntegriCloud