summaryrefslogtreecommitdiffstats
path: root/libavfilter/libmpcodecs
diff options
context:
space:
mode:
Diffstat (limited to 'libavfilter/libmpcodecs')
-rw-r--r--libavfilter/libmpcodecs/av_helpers.h27
-rw-r--r--libavfilter/libmpcodecs/cpudetect.h60
-rw-r--r--libavfilter/libmpcodecs/img_format.c244
-rw-r--r--libavfilter/libmpcodecs/img_format.h309
-rw-r--r--libavfilter/libmpcodecs/libvo/fastmemcpy.h99
-rw-r--r--libavfilter/libmpcodecs/libvo/video_out.h300
-rw-r--r--libavfilter/libmpcodecs/mp_image.c257
-rw-r--r--libavfilter/libmpcodecs/mp_image.h159
-rw-r--r--libavfilter/libmpcodecs/mp_msg.h166
-rw-r--r--libavfilter/libmpcodecs/mpc_info.h43
-rw-r--r--libavfilter/libmpcodecs/vf.h169
-rw-r--r--libavfilter/libmpcodecs/vf_eq.c240
-rw-r--r--libavfilter/libmpcodecs/vf_eq2.c519
-rw-r--r--libavfilter/libmpcodecs/vf_fspp.c2124
-rw-r--r--libavfilter/libmpcodecs/vf_ilpack.c458
-rw-r--r--libavfilter/libmpcodecs/vf_pp7.c491
-rw-r--r--libavfilter/libmpcodecs/vf_softpulldown.c163
-rw-r--r--libavfilter/libmpcodecs/vf_uspp.c394
-rw-r--r--libavfilter/libmpcodecs/vfcap.h56
19 files changed, 6278 insertions, 0 deletions
diff --git a/libavfilter/libmpcodecs/av_helpers.h b/libavfilter/libmpcodecs/av_helpers.h
new file mode 100644
index 0000000..90b67d5
--- /dev/null
+++ b/libavfilter/libmpcodecs/av_helpers.h
@@ -0,0 +1,27 @@
+/*
+ * Generic libav* helpers
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_AV_HELPERS_H
+#define MPLAYER_AV_HELPERS_H
+
+void ff_init_avcodec(void);
+void ff_init_avformat(void);
+
+#endif /* MPLAYER_AV_HELPERS_H */
diff --git a/libavfilter/libmpcodecs/cpudetect.h b/libavfilter/libmpcodecs/cpudetect.h
new file mode 100644
index 0000000..710f6e6
--- /dev/null
+++ b/libavfilter/libmpcodecs/cpudetect.h
@@ -0,0 +1,60 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_CPUDETECT_H
+#define MPLAYER_CPUDETECT_H
+
+#define CPUTYPE_I386 3
+#define CPUTYPE_I486 4
+#define CPUTYPE_I586 5
+#define CPUTYPE_I686 6
+
+#include "libavutil/x86_cpu.h"
+
+typedef struct cpucaps_s {
+ int cpuType;
+ int cpuModel;
+ int cpuStepping;
+ int hasMMX;
+ int hasMMX2;
+ int has3DNow;
+ int has3DNowExt;
+ int hasSSE;
+ int hasSSE2;
+ int hasSSE3;
+ int hasSSSE3;
+ int hasSSE4;
+ int hasSSE42;
+ int hasSSE4a;
+ int hasAVX;
+ int isX86;
+ unsigned cl_size; /* size of cache line */
+ int hasAltiVec;
+ int hasTSC;
+} CpuCaps;
+
+extern CpuCaps ff_gCpuCaps;
+
+void ff_do_cpuid(unsigned int ax, unsigned int *p);
+
+void ff_GetCpuCaps(CpuCaps *caps);
+
+/* returned value is malloc()'ed so free() it after use */
+char *ff_GetCpuFriendlyName(unsigned int regs[], unsigned int regs2[]);
+
+#endif /* MPLAYER_CPUDETECT_H */
diff --git a/libavfilter/libmpcodecs/img_format.c b/libavfilter/libmpcodecs/img_format.c
new file mode 100644
index 0000000..dd07f00
--- /dev/null
+++ b/libavfilter/libmpcodecs/img_format.c
@@ -0,0 +1,244 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+#include "img_format.h"
+#include "stdio.h"
+#include "libavutil/bswap.h"
+
+const char *ff_vo_format_name(int format)
+{
+ static char unknown_format[20];
+ switch(format)
+ {
+ case IMGFMT_RGB1: return "RGB 1-bit";
+ case IMGFMT_RGB4: return "RGB 4-bit";
+ case IMGFMT_RG4B: return "RGB 4-bit per byte";
+ case IMGFMT_RGB8: return "RGB 8-bit";
+ case IMGFMT_RGB12: return "RGB 12-bit";
+ case IMGFMT_RGB15: return "RGB 15-bit";
+ case IMGFMT_RGB16: return "RGB 16-bit";
+ case IMGFMT_RGB24: return "RGB 24-bit";
+// case IMGFMT_RGB32: return "RGB 32-bit";
+ case IMGFMT_RGB48LE: return "RGB 48-bit LE";
+ case IMGFMT_RGB48BE: return "RGB 48-bit BE";
+ case IMGFMT_RGB64LE: return "RGB 64-bit LE";
+ case IMGFMT_RGB64BE: return "RGB 64-bit BE";
+ case IMGFMT_BGR1: return "BGR 1-bit";
+ case IMGFMT_BGR4: return "BGR 4-bit";
+ case IMGFMT_BG4B: return "BGR 4-bit per byte";
+ case IMGFMT_BGR8: return "BGR 8-bit";
+ case IMGFMT_BGR12: return "BGR 12-bit";
+ case IMGFMT_BGR15: return "BGR 15-bit";
+ case IMGFMT_BGR16: return "BGR 16-bit";
+ case IMGFMT_BGR24: return "BGR 24-bit";
+// case IMGFMT_BGR32: return "BGR 32-bit";
+ case IMGFMT_ABGR: return "ABGR";
+ case IMGFMT_BGRA: return "BGRA";
+ case IMGFMT_ARGB: return "ARGB";
+ case IMGFMT_RGBA: return "RGBA";
+ case IMGFMT_XYZ12LE: return "XYZ 36-bit LE";
+ case IMGFMT_XYZ12BE: return "XYZ 36-bit BE";
+ case IMGFMT_GBR24P: return "Planar GBR 24-bit";
+ case IMGFMT_GBR12P: return "Planar GBR 36-bit";
+ case IMGFMT_GBR14P: return "Planar GBR 42-bit";
+ case IMGFMT_YVU9: return "Planar YVU9";
+ case IMGFMT_IF09: return "Planar IF09";
+ case IMGFMT_YV12: return "Planar YV12";
+ case IMGFMT_I420: return "Planar I420";
+ case IMGFMT_IYUV: return "Planar IYUV";
+ case IMGFMT_CLPL: return "Planar CLPL";
+ case IMGFMT_Y800: return "Planar Y800";
+ case IMGFMT_Y8: return "Planar Y8";
+ case IMGFMT_Y8A: return "Planar Y8 with alpha";
+ case IMGFMT_Y16_LE: return "Planar Y16 little-endian";
+ case IMGFMT_Y16_BE: return "Planar Y16 big-endian";
+ case IMGFMT_420P16_LE: return "Planar 420P 16-bit little-endian";
+ case IMGFMT_420P16_BE: return "Planar 420P 16-bit big-endian";
+ case IMGFMT_420P14_LE: return "Planar 420P 14-bit little-endian";
+ case IMGFMT_420P14_BE: return "Planar 420P 14-bit big-endian";
+ case IMGFMT_420P12_LE: return "Planar 420P 12-bit little-endian";
+ case IMGFMT_420P12_BE: return "Planar 420P 12-bit big-endian";
+ case IMGFMT_420P10_LE: return "Planar 420P 10-bit little-endian";
+ case IMGFMT_420P10_BE: return "Planar 420P 10-bit big-endian";
+ case IMGFMT_420P9_LE: return "Planar 420P 9-bit little-endian";
+ case IMGFMT_420P9_BE: return "Planar 420P 9-bit big-endian";
+ case IMGFMT_422P16_LE: return "Planar 422P 16-bit little-endian";
+ case IMGFMT_422P16_BE: return "Planar 422P 16-bit big-endian";
+ case IMGFMT_422P14_LE: return "Planar 422P 14-bit little-endian";
+ case IMGFMT_422P14_BE: return "Planar 422P 14-bit big-endian";
+ case IMGFMT_422P12_LE: return "Planar 422P 12-bit little-endian";
+ case IMGFMT_422P12_BE: return "Planar 422P 12-bit big-endian";
+ case IMGFMT_422P10_LE: return "Planar 422P 10-bit little-endian";
+ case IMGFMT_422P10_BE: return "Planar 422P 10-bit big-endian";
+ case IMGFMT_422P9_LE: return "Planar 422P 9-bit little-endian";
+ case IMGFMT_422P9_BE: return "Planar 422P 9-bit big-endian";
+ case IMGFMT_444P16_LE: return "Planar 444P 16-bit little-endian";
+ case IMGFMT_444P16_BE: return "Planar 444P 16-bit big-endian";
+ case IMGFMT_444P14_LE: return "Planar 444P 14-bit little-endian";
+ case IMGFMT_444P14_BE: return "Planar 444P 14-bit big-endian";
+ case IMGFMT_444P12_LE: return "Planar 444P 12-bit little-endian";
+ case IMGFMT_444P12_BE: return "Planar 444P 12-bit big-endian";
+ case IMGFMT_444P10_LE: return "Planar 444P 10-bit little-endian";
+ case IMGFMT_444P10_BE: return "Planar 444P 10-bit big-endian";
+ case IMGFMT_444P9_LE: return "Planar 444P 9-bit little-endian";
+ case IMGFMT_444P9_BE: return "Planar 444P 9-bit big-endian";
+ case IMGFMT_420A: return "Planar 420P with alpha";
+ case IMGFMT_444P: return "Planar 444P";
+ case IMGFMT_444A: return "Planar 444P with alpha";
+ case IMGFMT_422P: return "Planar 422P";
+ case IMGFMT_422A: return "Planar 422P with alpha";
+ case IMGFMT_411P: return "Planar 411P";
+ case IMGFMT_440P: return "Planar 440P";
+ case IMGFMT_NV12: return "Planar NV12";
+ case IMGFMT_NV21: return "Planar NV21";
+ case IMGFMT_HM12: return "Planar NV12 Macroblock";
+ case IMGFMT_IUYV: return "Packed IUYV";
+ case IMGFMT_IY41: return "Packed IY41";
+ case IMGFMT_IYU1: return "Packed IYU1";
+ case IMGFMT_IYU2: return "Packed IYU2";
+ case IMGFMT_UYVY: return "Packed UYVY";
+ case IMGFMT_UYNV: return "Packed UYNV";
+ case IMGFMT_cyuv: return "Packed CYUV";
+ case IMGFMT_Y422: return "Packed Y422";
+ case IMGFMT_YUY2: return "Packed YUY2";
+ case IMGFMT_YUNV: return "Packed YUNV";
+ case IMGFMT_YVYU: return "Packed YVYU";
+ case IMGFMT_Y41P: return "Packed Y41P";
+ case IMGFMT_Y211: return "Packed Y211";
+ case IMGFMT_Y41T: return "Packed Y41T";
+ case IMGFMT_Y42T: return "Packed Y42T";
+ case IMGFMT_V422: return "Packed V422";
+ case IMGFMT_V655: return "Packed V655";
+ case IMGFMT_CLJR: return "Packed CLJR";
+ case IMGFMT_YUVP: return "Packed YUVP";
+ case IMGFMT_UYVP: return "Packed UYVP";
+ case IMGFMT_MPEGPES: return "Mpeg PES";
+ case IMGFMT_ZRMJPEGNI: return "Zoran MJPEG non-interlaced";
+ case IMGFMT_ZRMJPEGIT: return "Zoran MJPEG top field first";
+ case IMGFMT_ZRMJPEGIB: return "Zoran MJPEG bottom field first";
+ case IMGFMT_XVMC_MOCO_MPEG2: return "MPEG1/2 Motion Compensation";
+ case IMGFMT_XVMC_IDCT_MPEG2: return "MPEG1/2 Motion Compensation and IDCT";
+ case IMGFMT_VDPAU_MPEG1: return "MPEG1 VDPAU acceleration";
+ case IMGFMT_VDPAU_MPEG2: return "MPEG2 VDPAU acceleration";
+ case IMGFMT_VDPAU_H264: return "H.264 VDPAU acceleration";
+ case IMGFMT_VDPAU_MPEG4: return "MPEG-4 Part 2 VDPAU acceleration";
+ case IMGFMT_VDPAU_WMV3: return "WMV3 VDPAU acceleration";
+ case IMGFMT_VDPAU_VC1: return "VC1 VDPAU acceleration";
+ }
+ snprintf(unknown_format,20,"Unknown 0x%04x",format);
+ return unknown_format;
+}
+
+int ff_mp_get_chroma_shift(int format, int *x_shift, int *y_shift, int *component_bits)
+{
+ int xs = 0, ys = 0;
+ int bpp;
+ int err = 0;
+ int bits = 8;
+ if ((format & 0xff0000f0) == 0x34000050)
+ format = av_bswap32(format);
+ if ((format & 0xf00000ff) == 0x50000034) {
+ switch (format >> 24) {
+ case 0x50:
+ break;
+ case 0x51:
+ bits = 16;
+ break;
+ case 0x52:
+ bits = 10;
+ break;
+ case 0x53:
+ bits = 9;
+ break;
+ default:
+ err = 1;
+ break;
+ }
+ switch (format & 0x00ffffff) {
+ case 0x00343434: // 444
+ xs = 0;
+ ys = 0;
+ break;
+ case 0x00323234: // 422
+ xs = 1;
+ ys = 0;
+ break;
+ case 0x00303234: // 420
+ xs = 1;
+ ys = 1;
+ break;
+ case 0x00313134: // 411
+ xs = 2;
+ ys = 0;
+ break;
+ case 0x00303434: // 440
+ xs = 0;
+ ys = 1;
+ break;
+ default:
+ err = 1;
+ break;
+ }
+ } else switch (format) {
+ case IMGFMT_444A:
+ xs = 0;
+ ys = 0;
+ break;
+ case IMGFMT_422A:
+ xs = 1;
+ ys = 0;
+ break;
+ case IMGFMT_420A:
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ case IMGFMT_YV12:
+ xs = 1;
+ ys = 1;
+ break;
+ case IMGFMT_IF09:
+ case IMGFMT_YVU9:
+ xs = 2;
+ ys = 2;
+ break;
+ case IMGFMT_Y8:
+ case IMGFMT_Y800:
+ xs = 31;
+ ys = 31;
+ break;
+ case IMGFMT_NV12:
+ case IMGFMT_NV21:
+ xs = 1;
+ ys = 1;
+ // TODO: allowing this though currently breaks
+ // things all over the place.
+ err = 1;
+ break;
+ default:
+ err = 1;
+ break;
+ }
+ if (x_shift) *x_shift = xs;
+ if (y_shift) *y_shift = ys;
+ if (component_bits) *component_bits = bits;
+ bpp = 8 + ((16 >> xs) >> ys);
+ if (format == IMGFMT_420A || format == IMGFMT_422A || format == IMGFMT_444A)
+ bpp += 8;
+ bpp *= (bits + 7) >> 3;
+ return err ? 0 : bpp;
+}
diff --git a/libavfilter/libmpcodecs/img_format.h b/libavfilter/libmpcodecs/img_format.h
new file mode 100644
index 0000000..b5c0b90
--- /dev/null
+++ b/libavfilter/libmpcodecs/img_format.h
@@ -0,0 +1,309 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_IMG_FORMAT_H
+#define MPLAYER_IMG_FORMAT_H
+
+#include "config.h"
+
+/* RGB/BGR Formats */
+
+#define IMGFMT_RGB_MASK 0xFFFFFF00
+#define IMGFMT_RGB (('R'<<24)|('G'<<16)|('B'<<8))
+#define IMGFMT_RGB1 (IMGFMT_RGB|1)
+#define IMGFMT_RGB4 (IMGFMT_RGB|4)
+#define IMGFMT_RGB4_CHAR (IMGFMT_RGB|4|128) // RGB4 with 1 pixel per byte
+#define IMGFMT_RGB8 (IMGFMT_RGB|8)
+#define IMGFMT_RGB12 (IMGFMT_RGB|12)
+#define IMGFMT_RGB15 (IMGFMT_RGB|15)
+#define IMGFMT_RGB16 (IMGFMT_RGB|16)
+#define IMGFMT_RGB24 (IMGFMT_RGB|24)
+#define IMGFMT_RGB32 (IMGFMT_RGB|32)
+#define IMGFMT_RGB48LE (IMGFMT_RGB|48)
+#define IMGFMT_RGB48BE (IMGFMT_RGB|48|128)
+#define IMGFMT_RGB64LE (IMGFMT_RGB|64)
+#define IMGFMT_RGB64BE (IMGFMT_RGB|64|128)
+
+#define IMGFMT_BGR_MASK 0xFFFFFF00
+#define IMGFMT_BGR (('B'<<24)|('G'<<16)|('R'<<8))
+#define IMGFMT_BGR1 (IMGFMT_BGR|1)
+#define IMGFMT_BGR4 (IMGFMT_BGR|4)
+#define IMGFMT_BGR4_CHAR (IMGFMT_BGR|4|128) // BGR4 with 1 pixel per byte
+#define IMGFMT_BGR8 (IMGFMT_BGR|8)
+#define IMGFMT_BGR12 (IMGFMT_BGR|12)
+#define IMGFMT_BGR15 (IMGFMT_BGR|15)
+#define IMGFMT_BGR16 (IMGFMT_BGR|16)
+#define IMGFMT_BGR24 (IMGFMT_BGR|24)
+#define IMGFMT_BGR32 (IMGFMT_BGR|32)
+
+#define IMGFMT_XYZ_MASK 0xFFFFFF00
+#define IMGFMT_XYZ (('X'<<24)|('Y'<<16)|('Z'<<8))
+#define IMGFMT_XYZ12LE (IMGFMT_XYZ|12)
+#define IMGFMT_XYZ12BE (IMGFMT_XYZ|12|128)
+
+#define IMGFMT_GBR24P (('G'<<24)|('B'<<16)|('R'<<8)|24)
+#define IMGFMT_GBR12PLE (('G'<<24)|('B'<<16)|('R'<<8)|36)
+#define IMGFMT_GBR12PBE (('G'<<24)|('B'<<16)|('R'<<8)|36|128)
+#define IMGFMT_GBR14PLE (('G'<<24)|('B'<<16)|('R'<<8)|42)
+#define IMGFMT_GBR14PBE (('G'<<24)|('B'<<16)|('R'<<8)|42|128)
+
+#if HAVE_BIGENDIAN
+#define IMGFMT_ABGR IMGFMT_RGB32
+#define IMGFMT_BGRA (IMGFMT_RGB32|128)
+#define IMGFMT_ARGB IMGFMT_BGR32
+#define IMGFMT_RGBA (IMGFMT_BGR32|128)
+#define IMGFMT_RGB64NE IMGFMT_RGB64BE
+#define IMGFMT_RGB48NE IMGFMT_RGB48BE
+#define IMGFMT_RGB12BE IMGFMT_RGB12
+#define IMGFMT_RGB12LE (IMGFMT_RGB12|128)
+#define IMGFMT_RGB15BE IMGFMT_RGB15
+#define IMGFMT_RGB15LE (IMGFMT_RGB15|128)
+#define IMGFMT_RGB16BE IMGFMT_RGB16
+#define IMGFMT_RGB16LE (IMGFMT_RGB16|128)
+#define IMGFMT_BGR12BE IMGFMT_BGR12
+#define IMGFMT_BGR12LE (IMGFMT_BGR12|128)
+#define IMGFMT_BGR15BE IMGFMT_BGR15
+#define IMGFMT_BGR15LE (IMGFMT_BGR15|128)
+#define IMGFMT_BGR16BE IMGFMT_BGR16
+#define IMGFMT_BGR16LE (IMGFMT_BGR16|128)
+#define IMGFMT_XYZ12 IMGFMT_XYZ12BE
+#define IMGFMT_GBR12P IMGFMT_GBR12PBE
+#define IMGFMT_GBR14P IMGFMT_GBR14PBE
+#else
+#define IMGFMT_ABGR (IMGFMT_BGR32|128)
+#define IMGFMT_BGRA IMGFMT_BGR32
+#define IMGFMT_ARGB (IMGFMT_RGB32|128)
+#define IMGFMT_RGBA IMGFMT_RGB32
+#define IMGFMT_RGB64NE IMGFMT_RGB64LE
+#define IMGFMT_RGB48NE IMGFMT_RGB48LE
+#define IMGFMT_RGB12BE (IMGFMT_RGB12|128)
+#define IMGFMT_RGB12LE IMGFMT_RGB12
+#define IMGFMT_RGB15BE (IMGFMT_RGB15|128)
+#define IMGFMT_RGB15LE IMGFMT_RGB15
+#define IMGFMT_RGB16BE (IMGFMT_RGB16|128)
+#define IMGFMT_RGB16LE IMGFMT_RGB16
+#define IMGFMT_BGR12BE (IMGFMT_BGR12|128)
+#define IMGFMT_BGR12LE IMGFMT_BGR12
+#define IMGFMT_BGR15BE (IMGFMT_BGR15|128)
+#define IMGFMT_BGR15LE IMGFMT_BGR15
+#define IMGFMT_BGR16BE (IMGFMT_BGR16|128)
+#define IMGFMT_BGR16LE IMGFMT_BGR16
+#define IMGFMT_XYZ12 IMGFMT_XYZ12LE
+#define IMGFMT_GBR12P IMGFMT_GBR12PLE
+#define IMGFMT_GBR14P IMGFMT_GBR14PLE
+#endif
+
+/* old names for compatibility */
+#define IMGFMT_RG4B IMGFMT_RGB4_CHAR
+#define IMGFMT_BG4B IMGFMT_BGR4_CHAR
+
+#define IMGFMT_IS_RGB(fmt) (((fmt)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
+#define IMGFMT_IS_BGR(fmt) (((fmt)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
+#define IMGFMT_IS_XYZ(fmt) (((fmt)&IMGFMT_XYZ_MASK)==IMGFMT_XYZ)
+
+#define IMGFMT_RGB_DEPTH(fmt) ((fmt)&0x7F)
+#define IMGFMT_BGR_DEPTH(fmt) ((fmt)&0x7F)
+#define IMGFMT_XYZ_DEPTH(fmt) ((fmt)&0x7F)
+
+
+/* Planar YUV Formats */
+
+#define IMGFMT_YVU9 0x39555659
+#define IMGFMT_IF09 0x39304649
+#define IMGFMT_YV12 0x32315659
+#define IMGFMT_I420 0x30323449
+#define IMGFMT_IYUV 0x56555949
+#define IMGFMT_CLPL 0x4C504C43
+#define IMGFMT_Y800 0x30303859
+#define IMGFMT_Y8 0x20203859
+#define IMGFMT_NV12 0x3231564E
+#define IMGFMT_NV21 0x3132564E
+#define IMGFMT_Y16_LE 0x20363159
+
+/* unofficial Planar Formats, FIXME if official 4CC exists */
+#define IMGFMT_444P 0x50343434
+#define IMGFMT_422P 0x50323234
+#define IMGFMT_411P 0x50313134
+#define IMGFMT_440P 0x50303434
+#define IMGFMT_HM12 0x32314D48
+#define IMGFMT_Y16_BE 0x59313620
+
+// Gray with alpha
+#define IMGFMT_Y8A 0x59320008
+// 4:2:0 planar with alpha
+#define IMGFMT_420A 0x41303234
+// 4:2:2 planar with alpha
+#define IMGFMT_422A 0x41323234
+// 4:4:4 planar with alpha
+#define IMGFMT_444A 0x41343434
+
+#define IMGFMT_444P16_LE 0x51343434
+#define IMGFMT_444P16_BE 0x34343451
+#define IMGFMT_444P14_LE 0x54343434
+#define IMGFMT_444P14_BE 0x34343454
+#define IMGFMT_444P12_LE 0x55343434
+#define IMGFMT_444P12_BE 0x34343455
+#define IMGFMT_444P10_LE 0x52343434
+#define IMGFMT_444P10_BE 0x34343452
+#define IMGFMT_444P9_LE 0x53343434
+#define IMGFMT_444P9_BE 0x34343453
+#define IMGFMT_422P16_LE 0x51323234
+#define IMGFMT_422P16_BE 0x34323251
+#define IMGFMT_422P14_LE 0x54323234
+#define IMGFMT_422P14_BE 0x34323254
+#define IMGFMT_422P12_LE 0x55323234
+#define IMGFMT_422P12_BE 0x34323255
+#define IMGFMT_422P10_LE 0x52323234
+#define IMGFMT_422P10_BE 0x34323252
+#define IMGFMT_422P9_LE 0x53323234
+#define IMGFMT_422P9_BE 0x34323253
+#define IMGFMT_420P16_LE 0x51303234
+#define IMGFMT_420P16_BE 0x34323051
+#define IMGFMT_420P14_LE 0x54303234
+#define IMGFMT_420P14_BE 0x34323054
+#define IMGFMT_420P12_LE 0x55303234
+#define IMGFMT_420P12_BE 0x34323055
+#define IMGFMT_420P10_LE 0x52303234
+#define IMGFMT_420P10_BE 0x34323052
+#define IMGFMT_420P9_LE 0x53303234
+#define IMGFMT_420P9_BE 0x34323053
+#if HAVE_BIGENDIAN
+#define IMGFMT_444P16 IMGFMT_444P16_BE
+#define IMGFMT_444P14 IMGFMT_444P14_BE
+#define IMGFMT_444P12 IMGFMT_444P12_BE
+#define IMGFMT_444P10 IMGFMT_444P10_BE
+#define IMGFMT_444P9 IMGFMT_444P9_BE
+#define IMGFMT_422P16 IMGFMT_422P16_BE
+#define IMGFMT_422P14 IMGFMT_422P14_BE
+#define IMGFMT_422P12 IMGFMT_422P12_BE
+#define IMGFMT_422P10 IMGFMT_422P10_BE
+#define IMGFMT_422P9 IMGFMT_422P9_BE
+#define IMGFMT_420P16 IMGFMT_420P16_BE
+#define IMGFMT_420P14 IMGFMT_420P14_BE
+#define IMGFMT_420P12 IMGFMT_420P12_BE
+#define IMGFMT_420P10 IMGFMT_420P10_BE
+#define IMGFMT_420P9 IMGFMT_420P9_BE
+#define IMGFMT_Y16 IMGFMT_Y16_BE
+#define IMGFMT_IS_YUVP16_NE(fmt) IMGFMT_IS_YUVP16_BE(fmt)
+#else
+#define IMGFMT_444P16 IMGFMT_444P16_LE
+#define IMGFMT_444P14 IMGFMT_444P14_LE
+#define IMGFMT_444P12 IMGFMT_444P12_LE
+#define IMGFMT_444P10 IMGFMT_444P10_LE
+#define IMGFMT_444P9 IMGFMT_444P9_LE
+#define IMGFMT_422P16 IMGFMT_422P16_LE
+#define IMGFMT_422P14 IMGFMT_422P14_LE
+#define IMGFMT_422P12 IMGFMT_422P12_LE
+#define IMGFMT_422P10 IMGFMT_422P10_LE
+#define IMGFMT_422P9 IMGFMT_422P9_LE
+#define IMGFMT_420P16 IMGFMT_420P16_LE
+#define IMGFMT_420P14 IMGFMT_420P14_LE
+#define IMGFMT_420P12 IMGFMT_420P12_LE
+#define IMGFMT_420P10 IMGFMT_420P10_LE
+#define IMGFMT_420P9 IMGFMT_420P9_LE
+#define IMGFMT_Y16 IMGFMT_Y16_LE
+#define IMGFMT_IS_YUVP16_NE(fmt) IMGFMT_IS_YUVP16_LE(fmt)
+#endif
+
+#define IMGFMT_IS_YUVP16_LE(fmt) (((fmt - 0x51000034) & 0xfc0000ff) == 0)
+#define IMGFMT_IS_YUVP16_BE(fmt) (((fmt - 0x34000051) & 0xff0000fc) == 0)
+#define IMGFMT_IS_YUVP16(fmt) (IMGFMT_IS_YUVP16_LE(fmt) || IMGFMT_IS_YUVP16_BE(fmt))
+
+/**
+ * \brief Find the corresponding full 16 bit format, i.e. IMGFMT_420P10_LE -> IMGFMT_420P16_LE
+ * \return normalized format ID or 0 if none exists.
+ */
+static inline int normalize_yuvp16(int fmt) {
+ if (IMGFMT_IS_YUVP16_LE(fmt))
+ return (fmt & 0x00ffffff) | 0x51000000;
+ if (IMGFMT_IS_YUVP16_BE(fmt))
+ return (fmt & 0xffffff00) | 0x00000051;
+ return 0;
+}
+
+/* Packed YUV Formats */
+
+#define IMGFMT_IUYV 0x56595549 // Interlaced UYVY
+#define IMGFMT_IY41 0x31435949 // Interlaced Y41P
+#define IMGFMT_IYU1 0x31555949
+#define IMGFMT_IYU2 0x32555949
+#define IMGFMT_UYVY 0x59565955
+#define IMGFMT_UYNV 0x564E5955 // Exactly same as UYVY
+#define IMGFMT_cyuv 0x76757963 // upside-down UYVY
+#define IMGFMT_Y422 0x32323459 // Exactly same as UYVY
+#define IMGFMT_YUY2 0x32595559
+#define IMGFMT_YUNV 0x564E5559 // Exactly same as YUY2
+#define IMGFMT_YVYU 0x55595659
+#define IMGFMT_Y41P 0x50313459
+#define IMGFMT_Y211 0x31313259
+#define IMGFMT_Y41T 0x54313459 // Y41P, Y lsb = transparency
+#define IMGFMT_Y42T 0x54323459 // UYVY, Y lsb = transparency
+#define IMGFMT_V422 0x32323456 // upside-down UYVY?
+#define IMGFMT_V655 0x35353656
+#define IMGFMT_CLJR 0x524A4C43
+#define IMGFMT_YUVP 0x50565559 // 10-bit YUYV
+#define IMGFMT_UYVP 0x50565955 // 10-bit UYVY
+
+/* Compressed Formats */
+#define IMGFMT_MPEGPES (('M'<<24)|('P'<<16)|('E'<<8)|('S'))
+#define IMGFMT_MJPEG (('M')|('J'<<8)|('P'<<16)|('G'<<24))
+/* Formats that are understood by zoran chips, we include
+ * non-interlaced, interlaced top-first, interlaced bottom-first */
+#define IMGFMT_ZRMJPEGNI (('Z'<<24)|('R'<<16)|('N'<<8)|('I'))
+#define IMGFMT_ZRMJPEGIT (('Z'<<24)|('R'<<16)|('I'<<8)|('T'))
+#define IMGFMT_ZRMJPEGIB (('Z'<<24)|('R'<<16)|('I'<<8)|('B'))
+
+// I think that this code could not be used by any other codec/format
+#define IMGFMT_XVMC 0x1DC70000
+#define IMGFMT_XVMC_MASK 0xFFFF0000
+#define IMGFMT_IS_XVMC(fmt) (((fmt)&IMGFMT_XVMC_MASK)==IMGFMT_XVMC)
+//these are chroma420
+#define IMGFMT_XVMC_MOCO_MPEG2 (IMGFMT_XVMC|0x02)
+#define IMGFMT_XVMC_IDCT_MPEG2 (IMGFMT_XVMC|0x82)
+
+// VDPAU specific format.
+#define IMGFMT_VDPAU 0x1DC80000
+#define IMGFMT_VDPAU_MASK 0xFFFF0000
+#define IMGFMT_IS_VDPAU(fmt) (((fmt)&IMGFMT_VDPAU_MASK)==IMGFMT_VDPAU)
+#define IMGFMT_VDPAU_MPEG1 (IMGFMT_VDPAU|0x01)
+#define IMGFMT_VDPAU_MPEG2 (IMGFMT_VDPAU|0x02)
+#define IMGFMT_VDPAU_H264 (IMGFMT_VDPAU|0x03)
+#define IMGFMT_VDPAU_WMV3 (IMGFMT_VDPAU|0x04)
+#define IMGFMT_VDPAU_VC1 (IMGFMT_VDPAU|0x05)
+#define IMGFMT_VDPAU_MPEG4 (IMGFMT_VDPAU|0x06)
+
+#define IMGFMT_IS_HWACCEL(fmt) (IMGFMT_IS_VDPAU(fmt) || IMGFMT_IS_XVMC(fmt))
+
+typedef struct {
+ void* data;
+ int size;
+ int id; // stream id. usually 0x1E0
+ int timestamp; // pts, 90000 Hz counter based
+} vo_mpegpes_t;
+
+const char *ff_vo_format_name(int format);
+
+/**
+ * Calculates the scale shifts for the chroma planes for planar YUV
+ *
+ * \param component_bits bits per component
+ * \return bits-per-pixel for format if successful (i.e. format is 3 or 4-planes planar YUV), 0 otherwise
+ */
+int ff_mp_get_chroma_shift(int format, int *x_shift, int *y_shift, int *component_bits);
+
+#endif /* MPLAYER_IMG_FORMAT_H */
diff --git a/libavfilter/libmpcodecs/libvo/fastmemcpy.h b/libavfilter/libmpcodecs/libvo/fastmemcpy.h
new file mode 100644
index 0000000..5a17d01
--- /dev/null
+++ b/libavfilter/libmpcodecs/libvo/fastmemcpy.h
@@ -0,0 +1,99 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with MPlayer; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef MPLAYER_FASTMEMCPY_H
+#define MPLAYER_FASTMEMCPY_H
+
+#include <inttypes.h>
+#include <string.h>
+#include <stddef.h>
+
+void * fast_memcpy(void * to, const void * from, size_t len);
+void * mem2agpcpy(void * to, const void * from, size_t len);
+
+#if ! defined(CONFIG_FASTMEMCPY) || ! (HAVE_MMX || HAVE_MMX2 || HAVE_AMD3DNOW /* || HAVE_SSE || HAVE_SSE2 */)
+#define mem2agpcpy(a,b,c) memcpy(a,b,c)
+#define fast_memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride)
+{
+ int i;
+ void *retval=dst;
+
+ if(dstStride == srcStride)
+ {
+ if (srcStride < 0) {
+ src = (const uint8_t*)src + (height-1)*srcStride;
+ dst = (uint8_t*)dst + (height-1)*dstStride;
+ srcStride = -srcStride;
+ }
+
+ mem2agpcpy(dst, src, srcStride*height);
+ }
+ else
+ {
+ for(i=0; i<height; i++)
+ {
+ mem2agpcpy(dst, src, bytesPerLine);
+ src = (const uint8_t*)src + srcStride;
+ dst = (uint8_t*)dst + dstStride;
+ }
+ }
+
+ return retval;
+}
+
+#define memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 0)
+#define my_memcpy_pic(d, s, b, h, ds, ss) memcpy_pic2(d, s, b, h, ds, ss, 1)
+
+/**
+ * \param limit2width always skip data between end of line and start of next
+ * instead of copying the full block when strides are the same
+ */
+static inline void * memcpy_pic2(void * dst, const void * src,
+ int bytesPerLine, int height,
+ int dstStride, int srcStride, int limit2width)
+{
+ int i;
+ void *retval=dst;
+
+ if(!limit2width && dstStride == srcStride)
+ {
+ if (srcStride < 0) {
+ src = (const uint8_t*)src + (height-1)*srcStride;
+ dst = (uint8_t*)dst + (height-1)*dstStride;
+ srcStride = -srcStride;
+ }
+
+ fast_memcpy(dst, src, srcStride*height);
+ }
+ else
+ {
+ for(i=0; i<height; i++)
+ {
+ fast_memcpy(dst, src, bytesPerLine);
+ src = (const uint8_t*)src + srcStride;
+ dst = (uint8_t*)dst + dstStride;
+ }
+ }
+
+ return retval;
+}
+
+#endif /* MPLAYER_FASTMEMCPY_H */
diff --git a/libavfilter/libmpcodecs/libvo/video_out.h b/libavfilter/libmpcodecs/libvo/video_out.h
new file mode 100644
index 0000000..49d3098
--- /dev/null
+++ b/libavfilter/libmpcodecs/libvo/video_out.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright (C) Aaron Holtzman - Aug 1999
+ * Strongly modified, most parts rewritten: A'rpi/ESP-team - 2000-2001
+ * (C) MPlayer developers
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_VIDEO_OUT_H
+#define MPLAYER_VIDEO_OUT_H
+
+#include <inttypes.h>
+#include <stdarg.h>
+
+//#include "sub/font_load.h"
+#include "../img_format.h"
+//#include "vidix/vidix.h"
+
+
+#define ROTATE(t, x, y) do { \
+ t rot_tmp = x; \
+ x = y; \
+ y = -rot_tmp; \
+} while(0)
+
+#define VO_EVENT_EXPOSE 1
+#define VO_EVENT_RESIZE 2
+#define VO_EVENT_KEYPRESS 4
+#define VO_EVENT_REINIT 8
+#define VO_EVENT_MOVE 16
+#define VO_EVENT_MOUSE 32
+
+/* Obsolete: VOCTRL_QUERY_VAA 1 */
+/* does the device support the required format */
+#define VOCTRL_QUERY_FORMAT 2
+/* signal a device reset seek */
+#define VOCTRL_RESET 3
+/* true if vo driver can use GUI created windows */
+#define VOCTRL_GUISUPPORT 4
+/* used to switch to fullscreen */
+#define VOCTRL_FULLSCREEN 5
+/* signal a device pause */
+#define VOCTRL_PAUSE 7
+/* start/resume playback */
+#define VOCTRL_RESUME 8
+/* libmpcodecs direct rendering: */
+#define VOCTRL_GET_IMAGE 9
+#define VOCTRL_DRAW_IMAGE 13
+#define VOCTRL_SET_SPU_PALETTE 14
+/* decoding ahead: */
+#define VOCTRL_GET_NUM_FRAMES 10
+#define VOCTRL_GET_FRAME_NUM 11
+#define VOCTRL_SET_FRAME_NUM 12
+#define VOCTRL_GET_PANSCAN 15
+#define VOCTRL_SET_PANSCAN 16
+/* equalizer controls */
+#define VOCTRL_SET_EQUALIZER 17
+#define VOCTRL_GET_EQUALIZER 18
+/* Frame duplication */
+#define VOCTRL_DUPLICATE_FRAME 20
+// ... 21
+#define VOCTRL_START_SLICE 21
+
+#define VOCTRL_ONTOP 25
+#define VOCTRL_ROOTWIN 26
+#define VOCTRL_BORDER 27
+#define VOCTRL_DRAW_EOSD 28
+#define VOCTRL_GET_EOSD_RES 29
+
+#define VOCTRL_SET_DEINTERLACE 30
+#define VOCTRL_GET_DEINTERLACE 31
+
+#define VOCTRL_UPDATE_SCREENINFO 32
+
+// Vo can be used by xover
+#define VOCTRL_XOVERLAY_SUPPORT 22
+
+#define VOCTRL_XOVERLAY_SET_COLORKEY 24
+typedef struct {
+ uint32_t x11; // The raw x11 color
+ uint16_t r,g,b;
+} mp_colorkey_t;
+
+#define VOCTRL_XOVERLAY_SET_WIN 23
+typedef struct {
+ int x,y;
+ int w,h;
+} mp_win_t;
+
+#define VO_TRUE 1
+#define VO_FALSE 0
+#define VO_ERROR -1
+#define VO_NOTAVAIL -2
+#define VO_NOTIMPL -3
+
+#define VOFLAG_FULLSCREEN 0x01
+#define VOFLAG_MODESWITCHING 0x02
+#define VOFLAG_SWSCALE 0x04
+#define VOFLAG_FLIPPING 0x08
+#define VOFLAG_HIDDEN 0x10 //< Use to create a hidden window
+#define VOFLAG_STEREO 0x20 //< Use to create a stereo-capable window
+#define VOFLAG_DEPTH 0x40 //< Request a depth buffer
+#define VOFLAG_XOVERLAY_SUB_VO 0x10000
+
+typedef struct vo_info_s
+{
+ /* driver name ("Matrox Millennium G200/G400" */
+ const char *name;
+ /* short name (for config strings) ("mga") */
+ const char *short_name;
+ /* author ("Aaron Holtzman <aholtzma@ess.engr.uvic.ca>") */
+ const char *author;
+ /* any additional comments */
+ const char *comment;
+} vo_info_t;
+
+typedef struct vo_functions_s
+{
+ const vo_info_t *info;
+ /*
+ * Preinitializes driver (real INITIALIZATION)
+ * arg - currently it's vo_subdevice
+ * returns: zero on successful initialization, non-zero on error.
+ */
+ int (*preinit)(const char *arg);
+ /*
+ * Initialize (means CONFIGURE) the display driver.
+ * params:
+ * width,height: image source size
+ * d_width,d_height: size of the requested window size, just a hint
+ * fullscreen: flag, 0=windowd 1=fullscreen, just a hint
+ * title: window title, if available
+ * format: fourcc of pixel format
+ * returns : zero on successful initialization, non-zero on error.
+ */
+ int (*config)(uint32_t width, uint32_t height, uint32_t d_width,
+ uint32_t d_height, uint32_t fullscreen, char *title,
+ uint32_t format);
+
+ /*
+ * Control interface
+ */
+ int (*control)(uint32_t request, void *data, ...);
+
+ /*
+ * Display a new RGB/BGR frame of the video to the screen.
+ * params:
+ * src[0] - pointer to the image
+ */
+ int (*draw_frame)(uint8_t *src[]);
+
+ /*
+ * Draw a planar YUV slice to the buffer:
+ * params:
+ * src[3] = source image planes (Y,U,V)
+ * stride[3] = source image planes line widths (in bytes)
+ * w,h = width*height of area to be copied (in Y pixels)
+ * x,y = position at the destination image (in Y pixels)
+ */
+ int (*draw_slice)(uint8_t *src[], int stride[], int w,int h, int x,int y);
+
+ /*
+ * Draws OSD to the screen buffer
+ */
+ void (*draw_osd)(void);
+
+ /*
+ * Blit/Flip buffer to the screen. Must be called after each frame!
+ */
+ void (*flip_page)(void);
+
+ /*
+ * This func is called after every frames to handle keyboard and
+ * other events. It's called in PAUSE mode too!
+ */
+ void (*check_events)(void);
+
+ /*
+ * Closes driver. Should restore the original state of the system.
+ */
+ void (*uninit)(void);
+} vo_functions_t;
+
+const vo_functions_t* init_best_video_out(char** vo_list);
+int config_video_out(const vo_functions_t *vo, uint32_t width, uint32_t height,
+ uint32_t d_width, uint32_t d_height, uint32_t flags,
+ char *title, uint32_t format);
+void list_video_out(void);
+
+// NULL terminated array of all drivers
+extern const vo_functions_t* const video_out_drivers[];
+
+extern int vo_flags;
+
+extern int vo_config_count;
+
+extern int xinerama_screen;
+extern int xinerama_x;
+extern int xinerama_y;
+
+// correct resolution/bpp on screen: (should be autodetected by vo_init())
+extern int vo_depthonscreen;
+extern int vo_screenwidth;
+extern int vo_screenheight;
+
+// requested resolution/bpp: (-x -y -bpp options)
+extern int vo_dx;
+extern int vo_dy;
+extern int vo_dwidth;
+extern int vo_dheight;
+extern int vo_dbpp;
+
+extern int vo_grabpointer;
+extern int vo_doublebuffering;
+extern int vo_directrendering;
+extern int vo_vsync;
+extern int vo_fsmode;
+extern float vo_panscan;
+extern float vo_border_pos_x;
+extern float vo_border_pos_y;
+extern int vo_rotate;
+extern int vo_adapter_num;
+extern int vo_refresh_rate;
+extern int vo_keepaspect;
+extern int vo_rootwin;
+extern int vo_ontop;
+extern int vo_border;
+
+extern int vo_gamma_gamma;
+extern int vo_gamma_brightness;
+extern int vo_gamma_saturation;
+extern int vo_gamma_contrast;
+extern int vo_gamma_hue;
+extern int vo_gamma_red_intensity;
+extern int vo_gamma_green_intensity;
+extern int vo_gamma_blue_intensity;
+
+extern int vo_nomouse_input;
+extern int enable_mouse_movements;
+
+extern int vo_pts;
+extern float vo_fps;
+
+extern char *vo_subdevice;
+
+extern int vo_colorkey;
+
+extern char *vo_winname;
+extern char *vo_wintitle;
+
+extern int64_t WinID;
+
+typedef struct {
+ float min;
+ float max;
+ } range_t;
+
+float range_max(range_t *r);
+int in_range(range_t *r, float f);
+range_t *str2range(char *s);
+extern char *monitor_hfreq_str;
+extern char *monitor_vfreq_str;
+extern char *monitor_dotclock_str;
+
+struct mp_keymap {
+ int from;
+ int to;
+};
+int lookup_keymap_table(const struct mp_keymap *map, int key);
+struct vo_rect {
+ int left, right, top, bottom, width, height;
+};
+void calc_src_dst_rects(int src_width, int src_height, struct vo_rect *src, struct vo_rect *dst,
+ struct vo_rect *borders, const struct vo_rect *crop);
+void vo_mouse_movement(int posx, int posy);
+
+static inline int apply_border_pos(int full, int part, float pos) {
+ if (pos >= 0.0 && pos <= 1.0) {
+ return pos*(full - part);
+ }
+ if (pos < 0)
+ return pos * part;
+ return full - part + (pos - 1) * part;
+}
+
+#endif /* MPLAYER_VIDEO_OUT_H */
diff --git a/libavfilter/libmpcodecs/mp_image.c b/libavfilter/libmpcodecs/mp_image.c
new file mode 100644
index 0000000..0e4d6d7
--- /dev/null
+++ b/libavfilter/libmpcodecs/mp_image.c
@@ -0,0 +1,257 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#include "img_format.h"
+#include "mp_image.h"
+
+#include "libvo/fastmemcpy.h"
+//#include "libavutil/mem.h"
+#include "libavutil/imgutils.h"
+
+void ff_mp_image_alloc_planes(mp_image_t *mpi) {
+ uint32_t temp[256];
+ if (avpriv_set_systematic_pal2(temp, ff_mp2ff_pix_fmt(mpi->imgfmt)) >= 0)
+ mpi->flags |= MP_IMGFLAG_RGB_PALETTE;
+
+ // IF09 - allocate space for 4. plane delta info - unused
+ if (mpi->imgfmt == IMGFMT_IF09) {
+ mpi->planes[0]=av_malloc(mpi->bpp*mpi->width*(mpi->height+2)/8+
+ mpi->chroma_width*mpi->chroma_height);
+ } else
+ mpi->planes[0]=av_malloc(mpi->bpp*mpi->width*(mpi->height+2)/8);
+ if (mpi->flags&MP_IMGFLAG_PLANAR) {
+ int bpp = IMGFMT_IS_YUVP16(mpi->imgfmt)? 2 : 1;
+ // YV12/I420/YVU9/IF09. feel free to add other planar formats here...
+ mpi->stride[0]=mpi->stride[3]=bpp*mpi->width;
+ if(mpi->num_planes > 2){
+ mpi->stride[1]=mpi->stride[2]=bpp*mpi->chroma_width;
+ if(mpi->flags&MP_IMGFLAG_SWAPPED){
+ // I420/IYUV (Y,U,V)
+ mpi->planes[1]=mpi->planes[0]+mpi->stride[0]*mpi->height;
+ mpi->planes[2]=mpi->planes[1]+mpi->stride[1]*mpi->chroma_height;
+ if (mpi->num_planes > 3)
+ mpi->planes[3]=mpi->planes[2]+mpi->stride[2]*mpi->chroma_height;
+ } else {
+ // YV12,YVU9,IF09 (Y,V,U)
+ mpi->planes[2]=mpi->planes[0]+mpi->stride[0]*mpi->height;
+ mpi->planes[1]=mpi->planes[2]+mpi->stride[1]*mpi->chroma_height;
+ if (mpi->num_planes > 3)
+ mpi->planes[3]=mpi->planes[1]+mpi->stride[1]*mpi->chroma_height;
+ }
+ } else {
+ // NV12/NV21
+ mpi->stride[1]=mpi->chroma_width;
+ mpi->planes[1]=mpi->planes[0]+mpi->stride[0]*mpi->height;
+ }
+ } else {
+ mpi->stride[0]=mpi->width*mpi->bpp/8;
+ if (mpi->flags & MP_IMGFLAG_RGB_PALETTE) {
+ mpi->planes[1] = av_malloc(1024);
+ memcpy(mpi->planes[1], temp, 1024);
+ }
+ }
+ mpi->flags|=MP_IMGFLAG_ALLOCATED;
+}
+
+mp_image_t* ff_alloc_mpi(int w, int h, unsigned long int fmt) {
+ mp_image_t* mpi = ff_new_mp_image(w,h);
+
+ ff_mp_image_setfmt(mpi,fmt);
+ ff_mp_image_alloc_planes(mpi);
+
+ return mpi;
+}
+
+void ff_copy_mpi(mp_image_t *dmpi, mp_image_t *mpi) {
+ if(mpi->flags&MP_IMGFLAG_PLANAR){
+ memcpy_pic(dmpi->planes[0],mpi->planes[0], mpi->w, mpi->h,
+ dmpi->stride[0],mpi->stride[0]);
+ memcpy_pic(dmpi->planes[1],mpi->planes[1], mpi->chroma_width, mpi->chroma_height,
+ dmpi->stride[1],mpi->stride[1]);
+ memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->chroma_width, mpi->chroma_height,
+ dmpi->stride[2],mpi->stride[2]);
+ } else {
+ memcpy_pic(dmpi->planes[0],mpi->planes[0],
+ mpi->w*(dmpi->bpp/8), mpi->h,
+ dmpi->stride[0],mpi->stride[0]);
+ }
+}
+
+void ff_mp_image_setfmt(mp_image_t* mpi,unsigned int out_fmt){
+ mpi->flags&=~(MP_IMGFLAG_PLANAR|MP_IMGFLAG_YUV|MP_IMGFLAG_SWAPPED);
+ mpi->imgfmt=out_fmt;
+ // compressed formats
+ if(out_fmt == IMGFMT_MPEGPES ||
+ out_fmt == IMGFMT_ZRMJPEGNI || out_fmt == IMGFMT_ZRMJPEGIT || out_fmt == IMGFMT_ZRMJPEGIB ||
+ IMGFMT_IS_HWACCEL(out_fmt)){
+ mpi->bpp=0;
+ return;
+ }
+ mpi->num_planes=1;
+ if (IMGFMT_IS_RGB(out_fmt)) {
+ if (IMGFMT_RGB_DEPTH(out_fmt) < 8 && !(out_fmt&128))
+ mpi->bpp = IMGFMT_RGB_DEPTH(out_fmt);
+ else
+ mpi->bpp=(IMGFMT_RGB_DEPTH(out_fmt)+7)&(~7);
+ return;
+ }
+ if (IMGFMT_IS_BGR(out_fmt)) {
+ if (IMGFMT_BGR_DEPTH(out_fmt) < 8 && !(out_fmt&128))
+ mpi->bpp = IMGFMT_BGR_DEPTH(out_fmt);
+ else
+ mpi->bpp=(IMGFMT_BGR_DEPTH(out_fmt)+7)&(~7);
+ mpi->flags|=MP_IMGFLAG_SWAPPED;
+ return;
+ }
+ if (IMGFMT_IS_XYZ(out_fmt)) {
+ mpi->bpp=3*((IMGFMT_XYZ_DEPTH(out_fmt) + 7) & ~7);
+ return;
+ }
+ mpi->num_planes=3;
+ if (out_fmt == IMGFMT_GBR24P) {
+ mpi->bpp=24;
+ mpi->flags|=MP_IMGFLAG_PLANAR;
+ return;
+ } else if (out_fmt == IMGFMT_GBR12P) {
+ mpi->bpp=36;
+ mpi->flags|=MP_IMGFLAG_PLANAR;
+ return;
+ } else if (out_fmt == IMGFMT_GBR14P) {
+ mpi->bpp=42;
+ mpi->flags|=MP_IMGFLAG_PLANAR;
+ return;
+ }
+ mpi->flags|=MP_IMGFLAG_YUV;
+ if (ff_mp_get_chroma_shift(out_fmt, NULL, NULL, NULL)) {
+ mpi->flags|=MP_IMGFLAG_PLANAR;
+ mpi->bpp = ff_mp_get_chroma_shift(out_fmt, &mpi->chroma_x_shift, &mpi->chroma_y_shift, NULL);
+ mpi->chroma_width = mpi->width >> mpi->chroma_x_shift;
+ mpi->chroma_height = mpi->height >> mpi->chroma_y_shift;
+ }
+ switch(out_fmt){
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ mpi->flags|=MP_IMGFLAG_SWAPPED;
+ case IMGFMT_YV12:
+ return;
+ case IMGFMT_420A:
+ case IMGFMT_422A:
+ case IMGFMT_444A:
+ case IMGFMT_IF09:
+ mpi->num_planes=4;
+ case IMGFMT_YVU9:
+ case IMGFMT_444P:
+ case IMGFMT_422P:
+ case IMGFMT_411P:
+ case IMGFMT_440P:
+ case IMGFMT_444P16_LE:
+ case IMGFMT_444P16_BE:
+ case IMGFMT_444P14_LE:
+ case IMGFMT_444P14_BE:
+ case IMGFMT_444P12_LE:
+ case IMGFMT_444P12_BE:
+ case IMGFMT_444P10_LE:
+ case IMGFMT_444P10_BE:
+ case IMGFMT_444P9_LE:
+ case IMGFMT_444P9_BE:
+ case IMGFMT_422P16_LE:
+ case IMGFMT_422P16_BE:
+ case IMGFMT_422P14_LE:
+ case IMGFMT_422P14_BE:
+ case IMGFMT_422P12_LE:
+ case IMGFMT_422P12_BE:
+ case IMGFMT_422P10_LE:
+ case IMGFMT_422P10_BE:
+ case IMGFMT_422P9_LE:
+ case IMGFMT_422P9_BE:
+ case IMGFMT_420P16_LE:
+ case IMGFMT_420P16_BE:
+ case IMGFMT_420P14_LE:
+ case IMGFMT_420P14_BE:
+ case IMGFMT_420P12_LE:
+ case IMGFMT_420P12_BE:
+ case IMGFMT_420P10_LE:
+ case IMGFMT_420P10_BE:
+ case IMGFMT_420P9_LE:
+ case IMGFMT_420P9_BE:
+ return;
+ case IMGFMT_Y16_LE:
+ case IMGFMT_Y16_BE:
+ mpi->bpp=16;
+ case IMGFMT_Y800:
+ case IMGFMT_Y8:
+ /* they're planar ones, but for easier handling use them as packed */
+ mpi->flags&=~MP_IMGFLAG_PLANAR;
+ mpi->num_planes=1;
+ return;
+ case IMGFMT_Y8A:
+ mpi->num_planes=2;
+ return;
+ case IMGFMT_UYVY:
+ mpi->flags|=MP_IMGFLAG_SWAPPED;
+ case IMGFMT_YUY2:
+ mpi->chroma_x_shift = 1;
+ mpi->bpp=16;
+ mpi->num_planes=1;
+ return;
+ case IMGFMT_NV12:
+ mpi->flags|=MP_IMGFLAG_SWAPPED;
+ case IMGFMT_NV21:
+ mpi->flags|=MP_IMGFLAG_PLANAR;
+ mpi->bpp=12;
+ mpi->num_planes=2;
+ mpi->chroma_width=(mpi->width>>0);
+ mpi->chroma_height=(mpi->height>>1);
+ mpi->chroma_x_shift=0;
+ mpi->chroma_y_shift=1;
+ return;
+ }
+ ff_mp_msg(MSGT_DECVIDEO,MSGL_WARN,"mp_image: unknown out_fmt: 0x%X\n",out_fmt);
+ mpi->bpp=0;
+}
+
+mp_image_t* ff_new_mp_image(int w,int h){
+ mp_image_t* mpi = malloc(sizeof(mp_image_t));
+ if(!mpi) return NULL; // error!
+ memset(mpi,0,sizeof(mp_image_t));
+ mpi->width=mpi->w=w;
+ mpi->height=mpi->h=h;
+ return mpi;
+}
+
+void ff_free_mp_image(mp_image_t* mpi){
+ if(!mpi) return;
+ if(mpi->flags&MP_IMGFLAG_ALLOCATED){
+ /* because we allocate the whole image at once */
+ av_free(mpi->planes[0]);
+ if (mpi->flags & MP_IMGFLAG_RGB_PALETTE)
+ av_free(mpi->planes[1]);
+ }
+ free(mpi);
+}
+
diff --git a/libavfilter/libmpcodecs/mp_image.h b/libavfilter/libmpcodecs/mp_image.h
new file mode 100644
index 0000000..aedf451
--- /dev/null
+++ b/libavfilter/libmpcodecs/mp_image.h
@@ -0,0 +1,159 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_MP_IMAGE_H
+#define MPLAYER_MP_IMAGE_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#undef printf //FIXME
+#undef fprintf //FIXME
+#include "mp_msg.h"
+#include "libavutil/avutil.h"
+#include "libavutil/avassert.h"
+#undef realloc
+#undef malloc
+#undef free
+#undef rand
+#undef srand
+#undef printf
+#undef strncpy
+#define ASMALIGN(ZEROBITS) ".p2align " #ZEROBITS "\n\t"
+#define CODEC_FLAG2_MEMC_ONLY 0x00001000 ///< Only do ME/MC (I frames -> ref, P frame -> ME+MC).
+
+enum AVPixelFormat ff_mp2ff_pix_fmt(int mp);
+
+//--------- codec's requirements (filled by the codec/vf) ---------
+
+//--- buffer content restrictions:
+// set if buffer content shouldn't be modified:
+#define MP_IMGFLAG_PRESERVE 0x01
+// set if buffer content will be READ.
+// This can be e.g. for next frame's MC: (I/P mpeg frames) -
+// then in combination with MP_IMGFLAG_PRESERVE - or it
+// can be because a video filter or codec will read a significant
+// amount of data while processing that frame (e.g. blending something
+// onto the frame, MV based intra prediction).
+// A frame marked like this should not be placed in to uncachable
+// video RAM for example.
+#define MP_IMGFLAG_READABLE 0x02
+
+//--- buffer width/stride/plane restrictions: (used for direct rendering)
+// stride _have_to_ be aligned to MB boundary: [for DR restrictions]
+#define MP_IMGFLAG_ACCEPT_ALIGNED_STRIDE 0x4
+// stride should be aligned to MB boundary: [for buffer allocation]
+#define MP_IMGFLAG_PREFER_ALIGNED_STRIDE 0x8
+// codec accept any stride (>=width):
+#define MP_IMGFLAG_ACCEPT_STRIDE 0x10
+// codec accept any width (width*bpp=stride -> stride%bpp==0) (>=width):
+#define MP_IMGFLAG_ACCEPT_WIDTH 0x20
+//--- for planar formats only:
+// uses only stride[0], and stride[1]=stride[2]=stride[0]>>mpi->chroma_x_shift
+#define MP_IMGFLAG_COMMON_STRIDE 0x40
+// uses only planes[0], and calculates planes[1,2] from width,height,imgfmt
+#define MP_IMGFLAG_COMMON_PLANE 0x80
+
+#define MP_IMGFLAGMASK_RESTRICTIONS 0xFF
+
+//--------- color info (filled by ff_mp_image_setfmt() ) -----------
+// set if number of planes > 1
+#define MP_IMGFLAG_PLANAR 0x100
+// set if it's YUV colorspace
+#define MP_IMGFLAG_YUV 0x200
+// set if it's swapped (BGR or YVU) plane/byteorder
+#define MP_IMGFLAG_SWAPPED 0x400
+// set if you want memory for palette allocated and managed by ff_vf_get_image etc.
+#define MP_IMGFLAG_RGB_PALETTE 0x800
+
+#define MP_IMGFLAGMASK_COLORS 0xF00
+
+// codec uses drawing/rendering callbacks (draw_slice()-like thing, DR method 2)
+// [the codec will set this flag if it supports callbacks, and the vo _may_
+// clear it in get_image() if draw_slice() not implemented]
+#define MP_IMGFLAG_DRAW_CALLBACK 0x1000
+// set if it's in video buffer/memory: [set by vo/vf's get_image() !!!]
+#define MP_IMGFLAG_DIRECT 0x2000
+// set if buffer is allocated (used in destination images):
+#define MP_IMGFLAG_ALLOCATED 0x4000
+
+// buffer type was printed (do NOT set this flag - it's for INTERNAL USE!!!)
+#define MP_IMGFLAG_TYPE_DISPLAYED 0x8000
+
+// codec doesn't support any form of direct rendering - it has own buffer
+// allocation. so we just export its buffer pointers:
+#define MP_IMGTYPE_EXPORT 0
+// codec requires a static WO buffer, but it does only partial updates later:
+#define MP_IMGTYPE_STATIC 1
+// codec just needs some WO memory, where it writes/copies the whole frame to:
+#define MP_IMGTYPE_TEMP 2
+// I+P type, requires 2+ independent static R/W buffers
+#define MP_IMGTYPE_IP 3
+// I+P+B type, requires 2+ independent static R/W and 1+ temp WO buffers
+#define MP_IMGTYPE_IPB 4
+// Upper 16 bits give desired buffer number, -1 means get next available
+#define MP_IMGTYPE_NUMBERED 5
+// Doesn't need any buffer, incomplete image (probably a first field only)
+// we need this type to be able to differentiate between half frames and
+// all other cases
+#define MP_IMGTYPE_INCOMPLETE 6
+
+#define MP_MAX_PLANES 4
+
+#define MP_IMGFIELD_ORDERED 0x01
+#define MP_IMGFIELD_TOP_FIRST 0x02
+#define MP_IMGFIELD_REPEAT_FIRST 0x04
+#define MP_IMGFIELD_TOP 0x08
+#define MP_IMGFIELD_BOTTOM 0x10
+#define MP_IMGFIELD_INTERLACED 0x20
+
+typedef struct mp_image {
+ unsigned int flags;
+ unsigned char type;
+ int number;
+ unsigned char bpp; // bits/pixel. NOT depth! for RGB it will be n*8
+ unsigned int imgfmt;
+ int width,height; // stored dimensions
+ int x,y,w,h; // visible dimensions
+ unsigned char* planes[MP_MAX_PLANES];
+ int stride[MP_MAX_PLANES];
+ char * qscale;
+ int qstride;
+ int pict_type; // 0->unknown, 1->I, 2->P, 3->B
+ int fields;
+ int qscale_type; // 0->mpeg1/4/h263, 1->mpeg2
+ int num_planes;
+ /* these are only used by planar formats Y,U(Cb),V(Cr) */
+ int chroma_width;
+ int chroma_height;
+ int chroma_x_shift; // horizontal
+ int chroma_y_shift; // vertical
+ int usage_count;
+ /* for private use by filter or vo driver (to store buffer id or dmpi) */
+ void* priv;
+} mp_image_t;
+
+void ff_mp_image_setfmt(mp_image_t* mpi,unsigned int out_fmt);
+mp_image_t* ff_new_mp_image(int w,int h);
+void ff_free_mp_image(mp_image_t* mpi);
+
+mp_image_t* ff_alloc_mpi(int w, int h, unsigned long int fmt);
+void ff_mp_image_alloc_planes(mp_image_t *mpi);
+void ff_copy_mpi(mp_image_t *dmpi, mp_image_t *mpi);
+
+#endif /* MPLAYER_MP_IMAGE_H */
diff --git a/libavfilter/libmpcodecs/mp_msg.h b/libavfilter/libmpcodecs/mp_msg.h
new file mode 100644
index 0000000..51cdff3
--- /dev/null
+++ b/libavfilter/libmpcodecs/mp_msg.h
@@ -0,0 +1,166 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_MP_MSG_H
+#define MPLAYER_MP_MSG_H
+
+#include <stdarg.h>
+
+// defined in mplayer.c and mencoder.c
+extern int verbose;
+
+// verbosity elevel:
+
+/* Only messages level MSGL_FATAL-MSGL_STATUS should be translated,
+ * messages level MSGL_V and above should not be translated. */
+
+#define MSGL_FATAL 0 // will exit/abort
+#define MSGL_ERR 1 // continues
+#define MSGL_WARN 2 // only warning
+#define MSGL_HINT 3 // short help message
+#define MSGL_INFO 4 // -quiet
+#define MSGL_STATUS 5 // v=0
+#define MSGL_V 6 // v=1
+#define MSGL_DBG2 7 // v=2
+#define MSGL_DBG3 8 // v=3
+#define MSGL_DBG4 9 // v=4
+#define MSGL_DBG5 10 // v=5
+
+#define MSGL_FIXME 1 // for conversions from printf where the appropriate MSGL is not known; set equal to ERR for obtrusiveness
+#define MSGT_FIXME 0 // for conversions from printf where the appropriate MSGT is not known; set equal to GLOBAL for obtrusiveness
+
+// code/module:
+
+#define MSGT_GLOBAL 0 // common player stuff errors
+#define MSGT_CPLAYER 1 // console player (mplayer.c)
+#define MSGT_GPLAYER 2 // gui player
+
+#define MSGT_VO 3 // libvo
+#define MSGT_AO 4 // libao
+
+#define MSGT_DEMUXER 5 // demuxer.c (general stuff)
+#define MSGT_DS 6 // demux stream (add/read packet etc)
+#define MSGT_DEMUX 7 // fileformat-specific stuff (demux_*.c)
+#define MSGT_HEADER 8 // fileformat-specific header (*header.c)
+
+#define MSGT_AVSYNC 9 // mplayer.c timer stuff
+#define MSGT_AUTOQ 10 // mplayer.c auto-quality stuff
+
+#define MSGT_CFGPARSER 11 // cfgparser.c
+
+#define MSGT_DECAUDIO 12 // av decoder
+#define MSGT_DECVIDEO 13
+
+#define MSGT_SEEK 14 // seeking code
+#define MSGT_WIN32 15 // win32 dll stuff
+#define MSGT_OPEN 16 // open.c (stream opening)
+#define MSGT_DVD 17 // open.c (DVD init/read/seek)
+
+#define MSGT_PARSEES 18 // parse_es.c (mpeg stream parser)
+#define MSGT_LIRC 19 // lirc_mp.c and input lirc driver
+
+#define MSGT_STREAM 20 // stream.c
+#define MSGT_CACHE 21 // cache2.c
+
+#define MSGT_MENCODER 22
+
+#define MSGT_XACODEC 23 // XAnim codecs
+
+#define MSGT_TV 24 // TV input subsystem
+
+#define MSGT_OSDEP 25 // OS-dependent parts
+
+#define MSGT_SPUDEC 26 // spudec.c
+
+#define MSGT_PLAYTREE 27 // Playtree handeling (playtree.c, playtreeparser.c)
+
+#define MSGT_INPUT 28
+
+#define MSGT_VFILTER 29
+
+#define MSGT_OSD 30
+
+#define MSGT_NETWORK 31
+
+#define MSGT_CPUDETECT 32
+
+#define MSGT_CODECCFG 33
+
+#define MSGT_SWS 34
+
+#define MSGT_VOBSUB 35
+#define MSGT_SUBREADER 36
+
+#define MSGT_AFILTER 37 // Audio filter messages
+
+#define MSGT_NETST 38 // Netstream
+
+#define MSGT_MUXER 39 // muxer layer
+
+#define MSGT_OSD_MENU 40
+
+#define MSGT_IDENTIFY 41 // -identify output
+
+#define MSGT_RADIO 42
+
+#define MSGT_ASS 43 // libass messages
+
+#define MSGT_LOADER 44 // dll loader messages
+
+#define MSGT_STATUSLINE 45 // playback/encoding status line
+
+#define MSGT_TELETEXT 46 // Teletext decoder
+
+#define MSGT_MAX 64
+
+
+extern char *ff_mp_msg_charset;
+extern int ff_mp_msg_color;
+extern int ff_mp_msg_module;
+
+extern int ff_mp_msg_levels[MSGT_MAX];
+extern int ff_mp_msg_level_all;
+
+
+void ff_mp_msg_init(void);
+int ff_mp_msg_test(int mod, int lev);
+
+#include "config.h"
+
+void ff_mp_msg_va(int mod, int lev, const char *format, va_list va);
+#ifdef __GNUC__
+void ff_mp_msg(int mod, int lev, const char *format, ... ) __attribute__ ((format (printf, 3, 4)));
+# ifdef MP_DEBUG
+# define mp_dbg(mod,lev, args... ) ff_mp_msg(mod, lev, ## args )
+# else
+ // only useful for developers, disable but check syntax
+# define mp_dbg(mod,lev, args... ) do { if (0) ff_mp_msg(mod, lev, ## args ); } while (0)
+# endif
+#else // not GNU C
+void ff_mp_msg(int mod, int lev, const char *format, ... );
+# ifdef MP_DEBUG
+# define mp_dbg(mod,lev, ... ) ff_mp_msg(mod, lev, __VA_ARGS__)
+# else
+ // only useful for developers, disable but check syntax
+# define mp_dbg(mod,lev, ... ) do { if (0) ff_mp_msg(mod, lev, __VA_ARGS__); } while (0)
+# endif
+#endif /* __GNUC__ */
+
+const char* ff_filename_recode(const char* filename);
+
+#endif /* MPLAYER_MP_MSG_H */
diff --git a/libavfilter/libmpcodecs/mpc_info.h b/libavfilter/libmpcodecs/mpc_info.h
new file mode 100644
index 0000000..8554699
--- /dev/null
+++ b/libavfilter/libmpcodecs/mpc_info.h
@@ -0,0 +1,43 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_MPC_INFO_H
+#define MPLAYER_MPC_INFO_H
+
+typedef struct mp_codec_info_s
+{
+ /* codec long name ("Autodesk FLI/FLC Animation decoder" */
+ const char *name;
+ /* short name (same as driver name in codecs.conf) ("dshow") */
+ const char *short_name;
+ /* interface author/maintainer */
+ const char *maintainer;
+ /* codec author ("Aaron Holtzman <aholtzma@ess.engr.uvic.ca>") */
+ const char *author;
+ /* any additional comments */
+ const char *comment;
+} mp_codec_info_t;
+
+#define CONTROL_OK 1
+#define CONTROL_TRUE 1
+#define CONTROL_FALSE 0
+#define CONTROL_UNKNOWN -1
+#define CONTROL_ERROR -2
+#define CONTROL_NA -3
+
+#endif /* MPLAYER_MPC_INFO_H */
diff --git a/libavfilter/libmpcodecs/vf.h b/libavfilter/libmpcodecs/vf.h
new file mode 100644
index 0000000..d8fc66b
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf.h
@@ -0,0 +1,169 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_VF_H
+#define MPLAYER_VF_H
+
+//#include "m_option.h"
+#include "mp_image.h"
+
+//extern m_obj_settings_t* vf_settings;
+//extern const m_obj_list_t vf_obj_list;
+
+struct vf_instance;
+struct vf_priv_s;
+
+typedef struct vf_info_s {
+ const char *info;
+ const char *name;
+ const char *author;
+ const char *comment;
+ int (*vf_open)(struct vf_instance *vf,char* args);
+ // Ptr to a struct dscribing the options
+ const void* opts;
+} vf_info_t;
+
+#define NUM_NUMBERED_MPI 50
+
+typedef struct vf_image_context_s {
+ mp_image_t* static_images[2];
+ mp_image_t* temp_images[1];
+ mp_image_t* export_images[1];
+ mp_image_t* numbered_images[NUM_NUMBERED_MPI];
+ int static_idx;
+} vf_image_context_t;
+
+typedef struct vf_format_context_t {
+ int have_configured;
+ int orig_width, orig_height, orig_fmt;
+} vf_format_context_t;
+
+typedef struct vf_instance {
+ const vf_info_t* info;
+ // funcs:
+ int (*config)(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt);
+ int (*control)(struct vf_instance *vf,
+ int request, void* data);
+ int (*query_format)(struct vf_instance *vf,
+ unsigned int fmt);
+ void (*get_image)(struct vf_instance *vf,
+ mp_image_t *mpi);
+ int (*put_image)(struct vf_instance *vf,
+ mp_image_t *mpi, double pts);
+ void (*start_slice)(struct vf_instance *vf,
+ mp_image_t *mpi);
+ void (*draw_slice)(struct vf_instance *vf,
+ unsigned char** src, int* stride, int w,int h, int x, int y);
+ void (*uninit)(struct vf_instance *vf);
+
+ int (*continue_buffered_image)(struct vf_instance *vf);
+ // caps:
+ unsigned int default_caps; // used by default query_format()
+ unsigned int default_reqs; // used by default config()
+ // data:
+ int w, h;
+ vf_image_context_t imgctx;
+ vf_format_context_t fmt;
+ struct vf_instance *next;
+ mp_image_t *dmpi;
+ struct vf_priv_s* priv;
+} vf_instance_t;
+
+// control codes:
+#include "mpc_info.h"
+
+typedef struct vf_seteq_s
+{
+ const char *item;
+ int value;
+} vf_equalizer_t;
+
+#define VFCTRL_QUERY_MAX_PP_LEVEL 4 /* test for postprocessing support (max level) */
+#define VFCTRL_SET_PP_LEVEL 5 /* set postprocessing level */
+#define VFCTRL_SET_EQUALIZER 6 /* set color options (brightness,contrast etc) */
+#define VFCTRL_GET_EQUALIZER 8 /* gset color options (brightness,contrast etc) */
+#define VFCTRL_DRAW_OSD 7
+#define VFCTRL_CHANGE_RECTANGLE 9 /* Change the rectangle boundaries */
+#define VFCTRL_FLIP_PAGE 10 /* Tell the vo to flip pages */
+#define VFCTRL_DUPLICATE_FRAME 11 /* For encoding - encode zero-change frame */
+#define VFCTRL_SKIP_NEXT_FRAME 12 /* For encoding - drop the next frame that passes through */
+#define VFCTRL_FLUSH_FRAMES 13 /* For encoding - flush delayed frames */
+#define VFCTRL_SCREENSHOT 14 /* Make a screenshot */
+#define VFCTRL_INIT_EOSD 15 /* Select EOSD renderer */
+#define VFCTRL_DRAW_EOSD 16 /* Render EOSD */
+#define VFCTRL_GET_PTS 17 /* Return last pts value that reached vf_vo*/
+#define VFCTRL_SET_DEINTERLACE 18 /* Set deinterlacing status */
+#define VFCTRL_GET_DEINTERLACE 19 /* Get deinterlacing status */
+
+#include "vfcap.h"
+
+//FIXME this should be in a common header, but i dunno which
+#define MP_NOPTS_VALUE (-1LL<<63) //both int64_t and double should be able to represent this exactly
+
+
+// functions:
+void ff_vf_mpi_clear(mp_image_t* mpi,int x0,int y0,int w,int h);
+mp_image_t* ff_vf_get_image(vf_instance_t* vf, unsigned int outfmt, int mp_imgtype, int mp_imgflag, int w, int h);
+
+vf_instance_t* vf_open_plugin(const vf_info_t* const* filter_list, vf_instance_t* next, const char *name, char **args);
+vf_instance_t* vf_open_filter(vf_instance_t* next, const char *name, char **args);
+vf_instance_t* ff_vf_add_before_vo(vf_instance_t **vf, char *name, char **args);
+vf_instance_t* vf_open_encoder(vf_instance_t* next, const char *name, char *args);
+
+unsigned int ff_vf_match_csp(vf_instance_t** vfp,const unsigned int* list,unsigned int preferred);
+void ff_vf_clone_mpi_attributes(mp_image_t* dst, mp_image_t* src);
+void ff_vf_queue_frame(vf_instance_t *vf, int (*)(vf_instance_t *));
+int ff_vf_output_queued_frame(vf_instance_t *vf);
+
+// default wrappers:
+int ff_vf_next_config(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt);
+int ff_vf_next_control(struct vf_instance *vf, int request, void* data);
+void ff_vf_extra_flip(struct vf_instance *vf);
+int ff_vf_next_query_format(struct vf_instance *vf, unsigned int fmt);
+int ff_vf_next_put_image(struct vf_instance *vf,mp_image_t *mpi, double pts);
+void ff_vf_next_draw_slice (struct vf_instance *vf, unsigned char** src, int* stride, int w,int h, int x, int y);
+
+vf_instance_t* ff_append_filters(vf_instance_t* last);
+
+void ff_vf_uninit_filter(vf_instance_t* vf);
+void ff_vf_uninit_filter_chain(vf_instance_t* vf);
+
+int ff_vf_config_wrapper(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt);
+
+static inline int norm_qscale(int qscale, int type)
+{
+ switch (type) {
+ case 0: // MPEG-1
+ return qscale;
+ case 1: // MPEG-2
+ return qscale >> 1;
+ case 2: // H264
+ return qscale >> 2;
+ case 3: // VP56
+ return (63 - qscale + 2) >> 2;
+ }
+ return qscale;
+}
+
+#endif /* MPLAYER_VF_H */
diff --git a/libavfilter/libmpcodecs/vf_eq.c b/libavfilter/libmpcodecs/vf_eq.c
new file mode 100644
index 0000000..f8efa84
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_eq.c
@@ -0,0 +1,240 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "config.h"
+#include "mp_msg.h"
+#include "cpudetect.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+
+#include "libvo/video_out.h"
+
+struct vf_priv_s {
+ unsigned char *buf;
+ int brightness;
+ int contrast;
+};
+
+#if HAVE_MMX && HAVE_6REGS
+static void process_MMX(unsigned char *dest, int dstride, unsigned char *src, int sstride,
+ int w, int h, int brightness, int contrast)
+{
+ int i;
+ int pel;
+ int dstep = dstride-w;
+ int sstep = sstride-w;
+ short brvec[4];
+ short contvec[4];
+
+ contrast = ((contrast+100)*256*16)/100;
+ brightness = ((brightness+100)*511)/200-128 - contrast/32;
+
+ brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness;
+ contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
+
+ while (h--) {
+ __asm__ volatile (
+ "movq (%5), %%mm3 \n\t"
+ "movq (%6), %%mm4 \n\t"
+ "pxor %%mm0, %%mm0 \n\t"
+ "movl %4, %%eax\n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%0), %%mm1 \n\t"
+ "movq (%0), %%mm2 \n\t"
+ "punpcklbw %%mm0, %%mm1 \n\t"
+ "punpckhbw %%mm0, %%mm2 \n\t"
+ "psllw $4, %%mm1 \n\t"
+ "psllw $4, %%mm2 \n\t"
+ "pmulhw %%mm4, %%mm1 \n\t"
+ "pmulhw %%mm4, %%mm2 \n\t"
+ "paddw %%mm3, %%mm1 \n\t"
+ "paddw %%mm3, %%mm2 \n\t"
+ "packuswb %%mm2, %%mm1 \n\t"
+ "add $8, %0 \n\t"
+ "movq %%mm1, (%1) \n\t"
+ "add $8, %1 \n\t"
+ "decl %%eax \n\t"
+ "jnz 1b \n\t"
+ : "=r" (src), "=r" (dest)
+ : "0" (src), "1" (dest), "r" (w>>3), "r" (brvec), "r" (contvec)
+ : "%eax"
+ );
+
+ for (i = w&7; i; i--)
+ {
+ pel = ((*src++* contrast)>>12) + brightness;
+ if(pel&768) pel = (-pel)>>31;
+ *dest++ = pel;
+ }
+
+ src += sstep;
+ dest += dstep;
+ }
+ __asm__ volatile ( "emms \n\t" ::: "memory" );
+}
+#endif
+
+static void process_C(unsigned char *dest, int dstride, unsigned char *src, int sstride,
+ int w, int h, int brightness, int contrast)
+{
+ int i;
+ int pel;
+ int dstep = dstride-w;
+ int sstep = sstride-w;
+
+ contrast = ((contrast+100)*256*256)/100;
+ brightness = ((brightness+100)*511)/200-128 - contrast/512;
+
+ while (h--) {
+ for (i = w; i; i--)
+ {
+ pel = ((*src++* contrast)>>16) + brightness;
+ if(pel&768) pel = (-pel)>>31;
+ *dest++ = pel;
+ }
+ src += sstep;
+ dest += dstep;
+ }
+}
+
+static void (*process)(unsigned char *dest, int dstride, unsigned char *src, int sstride,
+ int w, int h, int brightness, int contrast);
+
+/* FIXME: add packed yuv version of process */
+
+static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
+{
+ mp_image_t *dmpi;
+
+ dmpi=ff_vf_get_image(vf->next, mpi->imgfmt,
+ MP_IMGTYPE_EXPORT, 0,
+ mpi->w, mpi->h);
+
+ dmpi->stride[0] = mpi->stride[0];
+ dmpi->planes[1] = mpi->planes[1];
+ dmpi->planes[2] = mpi->planes[2];
+ dmpi->stride[1] = mpi->stride[1];
+ dmpi->stride[2] = mpi->stride[2];
+
+ if (!vf->priv->buf) vf->priv->buf = malloc(mpi->stride[0]*mpi->h);
+
+ if ((vf->priv->brightness == 0) && (vf->priv->contrast == 0))
+ dmpi->planes[0] = mpi->planes[0];
+ else {
+ dmpi->planes[0] = vf->priv->buf;
+ process(dmpi->planes[0], dmpi->stride[0],
+ mpi->planes[0], mpi->stride[0],
+ mpi->w, mpi->h, vf->priv->brightness,
+ vf->priv->contrast);
+ }
+
+ return ff_vf_next_put_image(vf,dmpi, pts);
+}
+
+static int control(struct vf_instance *vf, int request, void* data)
+{
+ vf_equalizer_t *eq;
+
+ switch (request) {
+ case VFCTRL_SET_EQUALIZER:
+ eq = data;
+ if (!strcmp(eq->item,"brightness")) {
+ vf->priv->brightness = eq->value;
+ return CONTROL_TRUE;
+ }
+ else if (!strcmp(eq->item,"contrast")) {
+ vf->priv->contrast = eq->value;
+ return CONTROL_TRUE;
+ }
+ break;
+ case VFCTRL_GET_EQUALIZER:
+ eq = data;
+ if (!strcmp(eq->item,"brightness")) {
+ eq->value = vf->priv->brightness;
+ return CONTROL_TRUE;
+ }
+ else if (!strcmp(eq->item,"contrast")) {
+ eq->value = vf->priv->contrast;
+ return CONTROL_TRUE;
+ }
+ break;
+ }
+ return ff_vf_next_control(vf, request, data);
+}
+
+static int query_format(struct vf_instance *vf, unsigned int fmt)
+{
+ switch (fmt) {
+ case IMGFMT_YVU9:
+ case IMGFMT_IF09:
+ case IMGFMT_YV12:
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ case IMGFMT_CLPL:
+ case IMGFMT_Y800:
+ case IMGFMT_Y8:
+ case IMGFMT_NV12:
+ case IMGFMT_NV21:
+ case IMGFMT_444P:
+ case IMGFMT_422P:
+ case IMGFMT_411P:
+ return ff_vf_next_query_format(vf, fmt);
+ }
+ return 0;
+}
+
+static void uninit(struct vf_instance *vf)
+{
+ free(vf->priv->buf);
+ free(vf->priv);
+}
+
+static int vf_open(vf_instance_t *vf, char *args)
+{
+ vf->control=control;
+ vf->query_format=query_format;
+ vf->put_image=put_image;
+ vf->uninit=uninit;
+
+ vf->priv = malloc(sizeof(struct vf_priv_s));
+ memset(vf->priv, 0, sizeof(struct vf_priv_s));
+ if (args) sscanf(args, "%d:%d", &vf->priv->brightness, &vf->priv->contrast);
+
+ process = process_C;
+#if HAVE_MMX && HAVE_6REGS
+ if(ff_gCpuCaps.hasMMX) process = process_MMX;
+#endif
+
+ return 1;
+}
+
+const vf_info_t ff_vf_info_eq = {
+ "soft video equalizer",
+ "eq",
+ "Richard Felker",
+ "",
+ vf_open,
+};
diff --git a/libavfilter/libmpcodecs/vf_eq2.c b/libavfilter/libmpcodecs/vf_eq2.c
new file mode 100644
index 0000000..0356813
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_eq2.c
@@ -0,0 +1,519 @@
+/*
+ * Software equalizer (brightness, contrast, gamma, saturation)
+ *
+ * Hampa Hug <hampa@hampa.ch> (original LUT gamma/contrast/brightness filter)
+ * Daniel Moreno <comac@comac.darktech.org> (saturation, R/G/B gamma support)
+ * Richard Felker (original MMX contrast/brightness code (vf_eq.c))
+ * Michael Niedermayer <michalni@gmx.at> (LUT16)
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <inttypes.h>
+
+#include "config.h"
+#include "mp_msg.h"
+#include "cpudetect.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+
+#define LUT16
+
+/* Per channel parameters */
+typedef struct eq2_param_t {
+ unsigned char lut[256];
+#ifdef LUT16
+ uint16_t lut16[256*256];
+#endif
+ int lut_clean;
+
+ void (*adjust) (struct eq2_param_t *par, unsigned char *dst, unsigned char *src,
+ unsigned w, unsigned h, unsigned dstride, unsigned sstride);
+
+ double c;
+ double b;
+ double g;
+ double w;
+} eq2_param_t;
+
+typedef struct vf_priv_s {
+ eq2_param_t param[3];
+
+ double contrast;
+ double brightness;
+ double saturation;
+
+ double gamma;
+ double gamma_weight;
+ double rgamma;
+ double ggamma;
+ double bgamma;
+
+ unsigned buf_w[3];
+ unsigned buf_h[3];
+ unsigned char *buf[3];
+} vf_eq2_t;
+
+
+static
+void create_lut (eq2_param_t *par)
+{
+ unsigned i;
+ double g, v;
+ double lw, gw;
+
+ g = par->g;
+ gw = par->w;
+ lw = 1.0 - gw;
+
+ if ((g < 0.001) || (g > 1000.0)) {
+ g = 1.0;
+ }
+
+ g = 1.0 / g;
+
+ for (i = 0; i < 256; i++) {
+ v = (double) i / 255.0;
+ v = par->c * (v - 0.5) + 0.5 + par->b;
+
+ if (v <= 0.0) {
+ par->lut[i] = 0;
+ }
+ else {
+ v = v*lw + pow(v, g)*gw;
+
+ if (v >= 1.0) {
+ par->lut[i] = 255;
+ }
+ else {
+ par->lut[i] = (unsigned char) (256.0 * v);
+ }
+ }
+ }
+
+#ifdef LUT16
+ for(i=0; i<256*256; i++){
+ par->lut16[i]= par->lut[i&0xFF] + (par->lut[i>>8]<<8);
+ }
+#endif
+
+ par->lut_clean = 1;
+}
+
+#if HAVE_MMX && HAVE_6REGS
+static
+void affine_1d_MMX (eq2_param_t *par, unsigned char *dst, unsigned char *src,
+ unsigned w, unsigned h, unsigned dstride, unsigned sstride)
+{
+ unsigned i;
+ int contrast, brightness;
+ unsigned dstep, sstep;
+ int pel;
+ short brvec[4];
+ short contvec[4];
+
+// printf("\nmmx: src=%p dst=%p w=%d h=%d ds=%d ss=%d\n",src,dst,w,h,dstride,sstride);
+
+ contrast = (int) (par->c * 256 * 16);
+ brightness = ((int) (100.0 * par->b + 100.0) * 511) / 200 - 128 - contrast / 32;
+
+ brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness;
+ contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
+
+ sstep = sstride - w;
+ dstep = dstride - w;
+
+ while (h-- > 0) {
+ __asm__ volatile (
+ "movq (%5), %%mm3 \n\t"
+ "movq (%6), %%mm4 \n\t"
+ "pxor %%mm0, %%mm0 \n\t"
+ "movl %4, %%eax\n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%0), %%mm1 \n\t"
+ "movq (%0), %%mm2 \n\t"
+ "punpcklbw %%mm0, %%mm1 \n\t"
+ "punpckhbw %%mm0, %%mm2 \n\t"
+ "psllw $4, %%mm1 \n\t"
+ "psllw $4, %%mm2 \n\t"
+ "pmulhw %%mm4, %%mm1 \n\t"
+ "pmulhw %%mm4, %%mm2 \n\t"
+ "paddw %%mm3, %%mm1 \n\t"
+ "paddw %%mm3, %%mm2 \n\t"
+ "packuswb %%mm2, %%mm1 \n\t"
+ "add $8, %0 \n\t"
+ "movq %%mm1, (%1) \n\t"
+ "add $8, %1 \n\t"
+ "decl %%eax \n\t"
+ "jnz 1b \n\t"
+ : "=r" (src), "=r" (dst)
+ : "0" (src), "1" (dst), "r" (w >> 3), "r" (brvec), "r" (contvec)
+ : "%eax"
+ );
+
+ for (i = w & 7; i > 0; i--) {
+ pel = ((*src++ * contrast) >> 12) + brightness;
+ if (pel & 768) {
+ pel = (-pel) >> 31;
+ }
+ *dst++ = pel;
+ }
+
+ src += sstep;
+ dst += dstep;
+ }
+
+ __asm__ volatile ( "emms \n\t" ::: "memory" );
+}
+#endif
+
+static
+void apply_lut (eq2_param_t *par, unsigned char *dst, unsigned char *src,
+ unsigned w, unsigned h, unsigned dstride, unsigned sstride)
+{
+ unsigned i, j, w2;
+ unsigned char *lut;
+ uint16_t *lut16;
+
+ if (!par->lut_clean) {
+ create_lut (par);
+ }
+
+ lut = par->lut;
+#ifdef LUT16
+ lut16 = par->lut16;
+ w2= (w>>3)<<2;
+ for (j = 0; j < h; j++) {
+ uint16_t *src16= (uint16_t*)src;
+ uint16_t *dst16= (uint16_t*)dst;
+ for (i = 0; i < w2; i+=4) {
+ dst16[i+0] = lut16[src16[i+0]];
+ dst16[i+1] = lut16[src16[i+1]];
+ dst16[i+2] = lut16[src16[i+2]];
+ dst16[i+3] = lut16[src16[i+3]];
+ }
+ i <<= 1;
+#else
+ w2= (w>>3)<<3;
+ for (j = 0; j < h; j++) {
+ for (i = 0; i < w2; i+=8) {
+ dst[i+0] = lut[src[i+0]];
+ dst[i+1] = lut[src[i+1]];
+ dst[i+2] = lut[src[i+2]];
+ dst[i+3] = lut[src[i+3]];
+ dst[i+4] = lut[src[i+4]];
+ dst[i+5] = lut[src[i+5]];
+ dst[i+6] = lut[src[i+6]];
+ dst[i+7] = lut[src[i+7]];
+ }
+#endif
+ for (; i < w; i++) {
+ dst[i] = lut[src[i]];
+ }
+
+ src += sstride;
+ dst += dstride;
+ }
+}
+
+static
+int put_image (vf_instance_t *vf, mp_image_t *src, double pts)
+{
+ unsigned i;
+ vf_eq2_t *eq2;
+ mp_image_t *dst;
+ unsigned long img_n,img_c;
+
+ eq2 = vf->priv;
+
+ if ((eq2->buf_w[0] != src->w) || (eq2->buf_h[0] != src->h)) {
+ eq2->buf_w[0] = src->w;
+ eq2->buf_h[0] = src->h;
+ eq2->buf_w[1] = eq2->buf_w[2] = src->w >> src->chroma_x_shift;
+ eq2->buf_h[1] = eq2->buf_h[2] = src->h >> src->chroma_y_shift;
+ img_n = eq2->buf_w[0]*eq2->buf_h[0];
+ if(src->num_planes>1){
+ img_c = eq2->buf_w[1]*eq2->buf_h[1];
+ eq2->buf[0] = realloc (eq2->buf[0], img_n + 2*img_c);
+ eq2->buf[1] = eq2->buf[0] + img_n;
+ eq2->buf[2] = eq2->buf[1] + img_c;
+ } else
+ eq2->buf[0] = realloc (eq2->buf[0], img_n);
+ }
+
+ dst = ff_vf_get_image (vf->next, src->imgfmt, MP_IMGTYPE_EXPORT, 0, src->w, src->h);
+
+ for (i = 0; i < ((src->num_planes>1)?3:1); i++) {
+ if (eq2->param[i].adjust) {
+ dst->planes[i] = eq2->buf[i];
+ dst->stride[i] = eq2->buf_w[i];
+
+ eq2->param[i].adjust (&eq2->param[i], dst->planes[i], src->planes[i],
+ eq2->buf_w[i], eq2->buf_h[i], dst->stride[i], src->stride[i]);
+ }
+ else {
+ dst->planes[i] = src->planes[i];
+ dst->stride[i] = src->stride[i];
+ }
+ }
+
+ return ff_vf_next_put_image (vf, dst, pts);
+}
+
+static
+void check_values (eq2_param_t *par)
+{
+ /* yuck! floating point comparisons... */
+
+ if ((par->c == 1.0) && (par->b == 0.0) && (par->g == 1.0)) {
+ par->adjust = NULL;
+ }
+#if HAVE_MMX && HAVE_6REGS
+ else if (par->g == 1.0 && ff_gCpuCaps.hasMMX) {
+ par->adjust = &affine_1d_MMX;
+ }
+#endif
+ else {
+ par->adjust = &apply_lut;
+ }
+}
+
+static
+void print_values (vf_eq2_t *eq2)
+{
+ ff_mp_msg (MSGT_VFILTER, MSGL_V, "vf_eq2: c=%.2f b=%.2f g=%.4f s=%.2f \n",
+ eq2->contrast, eq2->brightness, eq2->gamma, eq2->saturation
+ );
+}
+
+static
+void set_contrast (vf_eq2_t *eq2, double c)
+{
+ eq2->contrast = c;
+ eq2->param[0].c = c;
+ eq2->param[0].lut_clean = 0;
+ check_values (&eq2->param[0]);
+ print_values (eq2);
+}
+
+static
+void set_brightness (vf_eq2_t *eq2, double b)
+{
+ eq2->brightness = b;
+ eq2->param[0].b = b;
+ eq2->param[0].lut_clean = 0;
+ check_values (&eq2->param[0]);
+ print_values (eq2);
+}
+
+static
+void set_gamma (vf_eq2_t *eq2, double g)
+{
+ eq2->gamma = g;
+
+ eq2->param[0].g = eq2->gamma * eq2->ggamma;
+ eq2->param[1].g = sqrt (eq2->bgamma / eq2->ggamma);
+ eq2->param[2].g = sqrt (eq2->rgamma / eq2->ggamma);
+ eq2->param[0].w = eq2->param[1].w = eq2->param[2].w = eq2->gamma_weight;
+
+ eq2->param[0].lut_clean = 0;
+ eq2->param[1].lut_clean = 0;
+ eq2->param[2].lut_clean = 0;
+
+ check_values (&eq2->param[0]);
+ check_values (&eq2->param[1]);
+ check_values (&eq2->param[2]);
+
+ print_values (eq2);
+}
+
+static
+void set_saturation (vf_eq2_t *eq2, double s)
+{
+ eq2->saturation = s;
+
+ eq2->param[1].c = s;
+ eq2->param[2].c = s;
+
+ eq2->param[1].lut_clean = 0;
+ eq2->param[2].lut_clean = 0;
+
+ check_values (&eq2->param[1]);
+ check_values (&eq2->param[2]);
+
+ print_values (eq2);
+}
+
+static
+int control (vf_instance_t *vf, int request, void *data)
+{
+ vf_equalizer_t *eq;
+
+ switch (request) {
+ case VFCTRL_SET_EQUALIZER:
+ eq = (vf_equalizer_t *) data;
+
+ if (strcmp (eq->item, "gamma") == 0) {
+ set_gamma (vf->priv, exp (log (8.0) * eq->value / 100.0));
+ return CONTROL_TRUE;
+ }
+ else if (strcmp (eq->item, "contrast") == 0) {
+ set_contrast (vf->priv, (1.0 / 100.0) * (eq->value + 100));
+ return CONTROL_TRUE;
+ }
+ else if (strcmp (eq->item, "brightness") == 0) {
+ set_brightness (vf->priv, (1.0 / 100.0) * eq->value);
+ return CONTROL_TRUE;
+ }
+ else if (strcmp (eq->item, "saturation") == 0) {
+ set_saturation (vf->priv, (double) (eq->value + 100) / 100.0);
+ return CONTROL_TRUE;
+ }
+ break;
+
+ case VFCTRL_GET_EQUALIZER:
+ eq = (vf_equalizer_t *) data;
+ if (strcmp (eq->item, "gamma") == 0) {
+ eq->value = (int) (100.0 * log (vf->priv->gamma) / log (8.0));
+ return CONTROL_TRUE;
+ }
+ else if (strcmp (eq->item, "contrast") == 0) {
+ eq->value = (int) (100.0 * vf->priv->contrast) - 100;
+ return CONTROL_TRUE;
+ }
+ else if (strcmp (eq->item, "brightness") == 0) {
+ eq->value = (int) (100.0 * vf->priv->brightness);
+ return CONTROL_TRUE;
+ }
+ else if (strcmp (eq->item, "saturation") == 0) {
+ eq->value = (int) (100.0 * vf->priv->saturation) - 100;
+ return CONTROL_TRUE;
+ }
+ break;
+ }
+
+ return ff_vf_next_control (vf, request, data);
+}
+
+static
+int query_format (vf_instance_t *vf, unsigned fmt)
+{
+ switch (fmt) {
+ case IMGFMT_YVU9:
+ case IMGFMT_IF09:
+ case IMGFMT_YV12:
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ case IMGFMT_Y800:
+ case IMGFMT_Y8:
+ case IMGFMT_444P:
+ case IMGFMT_422P:
+ case IMGFMT_411P:
+ return ff_vf_next_query_format (vf, fmt);
+ }
+
+ return 0;
+}
+
+static
+void uninit (vf_instance_t *vf)
+{
+ if (vf->priv) {
+ free (vf->priv->buf[0]);
+ free (vf->priv);
+ }
+}
+
+static
+int vf_open(vf_instance_t *vf, char *args)
+{
+ unsigned i;
+ vf_eq2_t *eq2;
+ double par[8];
+
+ vf->control = control;
+ vf->query_format = query_format;
+ vf->put_image = put_image;
+ vf->uninit = uninit;
+
+ vf->priv = malloc (sizeof (vf_eq2_t));
+ eq2 = vf->priv;
+
+ for (i = 0; i < 3; i++) {
+ eq2->buf[i] = NULL;
+ eq2->buf_w[i] = 0;
+ eq2->buf_h[i] = 0;
+
+ eq2->param[i].adjust = NULL;
+ eq2->param[i].c = 1.0;
+ eq2->param[i].b = 0.0;
+ eq2->param[i].g = 1.0;
+ eq2->param[i].lut_clean = 0;
+ }
+
+ eq2->contrast = 1.0;
+ eq2->brightness = 0.0;
+ eq2->saturation = 1.0;
+
+ eq2->gamma = 1.0;
+ eq2->gamma_weight = 1.0;
+ eq2->rgamma = 1.0;
+ eq2->ggamma = 1.0;
+ eq2->bgamma = 1.0;
+
+ if (args) {
+ par[0] = 1.0;
+ par[1] = 1.0;
+ par[2] = 0.0;
+ par[3] = 1.0;
+ par[4] = 1.0;
+ par[5] = 1.0;
+ par[6] = 1.0;
+ par[7] = 1.0;
+ sscanf (args, "%lf:%lf:%lf:%lf:%lf:%lf:%lf:%lf",
+ par, par + 1, par + 2, par + 3, par + 4, par + 5, par + 6, par + 7
+ );
+
+ eq2->rgamma = par[4];
+ eq2->ggamma = par[5];
+ eq2->bgamma = par[6];
+ eq2->gamma_weight = par[7];
+
+ set_gamma (eq2, par[0]);
+ set_contrast (eq2, par[1]);
+ set_brightness (eq2, par[2]);
+ set_saturation (eq2, par[3]);
+ }
+
+ return 1;
+}
+
+const vf_info_t ff_vf_info_eq2 = {
+ "Software equalizer",
+ "eq2",
+ "Hampa Hug, Daniel Moreno, Richard Felker",
+ "",
+ &vf_open,
+ NULL
+};
diff --git a/libavfilter/libmpcodecs/vf_fspp.c b/libavfilter/libmpcodecs/vf_fspp.c
new file mode 100644
index 0000000..c4a36ef
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_fspp.c
@@ -0,0 +1,2124 @@
+/*
+ * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * This implementation is based on an algorithm described in
+ * "Aria Nosratinia Embedded Post-Processing for
+ * Enhancement of Compressed Images (1999)"
+ * (http://citeseer.nj.nec.com/nosratinia99embedded.html)
+ * Further, with splitting (i)dct into hor/ver passes, one of them can be
+ * performed once per block, not pixel. This allows for much better speed.
+ */
+
+/*
+ Heavily optimized version of SPP filter by Nikolaj
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+
+#include "config.h"
+
+#include "mp_msg.h"
+#include "cpudetect.h"
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+#include "av_helpers.h"
+#include "libvo/fastmemcpy.h"
+
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/avcodec.h"
+
+#undef free
+#undef malloc
+
+//===========================================================================//
+#define BLOCKSZ 12
+
+static const short custom_threshold[64]=
+// values (296) can't be too high
+// -it causes too big quant dependence
+// or maybe overflow(check), which results in some flashing
+{ 71, 296, 295, 237, 71, 40, 38, 19,
+ 245, 193, 185, 121, 102, 73, 53, 27,
+ 158, 129, 141, 107, 97, 73, 50, 26,
+ 102, 116, 109, 98, 82, 66, 45, 23,
+ 71, 94, 95, 81, 70, 56, 38, 20,
+ 56, 77, 74, 66, 56, 44, 30, 15,
+ 38, 53, 50, 45, 38, 30, 21, 11,
+ 20, 27, 26, 23, 20, 15, 11, 5
+};
+
+DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
+ { 0, 48, 12, 60, 3, 51, 15, 63, },
+ { 32, 16, 44, 28, 35, 19, 47, 31, },
+ { 8, 56, 4, 52, 11, 59, 7, 55, },
+ { 40, 24, 36, 20, 43, 27, 39, 23, },
+ { 2, 50, 14, 62, 1, 49, 13, 61, },
+ { 34, 18, 46, 30, 33, 17, 45, 29, },
+ { 10, 58, 6, 54, 9, 57, 5, 53, },
+ { 42, 26, 38, 22, 41, 25, 37, 21, },
+};
+
+struct vf_priv_s { //align 16 !
+ uint64_t threshold_mtx_noq[8*2];
+ uint64_t threshold_mtx[8*2];//used in both C & MMX (& later SSE2) versions
+
+ int log2_count;
+ int temp_stride;
+ int qp;
+ int mpeg2;
+ int prev_q;
+ uint8_t *src;
+ int16_t *temp;
+ int bframes;
+ char *non_b_qp;
+};
+
+
+#if !HAVE_MMX
+
+//This func reads from 1 slice, 1 and clears 0 & 1
+static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)
+{int y, x;
+#define STORE(pos) \
+ temp= (src[x + pos] + (d[pos]>>log2_scale))>>(6-log2_scale); \
+ src[x + pos]=src[x + pos - 8*src_stride]=0; \
+ if(temp & 0x100) temp= ~(temp>>31); \
+ dst[x + pos]= temp;
+
+ for(y=0; y<height; y++){
+ const uint8_t *d= dither[y];
+ for(x=0; x<width; x+=8){
+ int temp;
+ STORE(0);
+ STORE(1);
+ STORE(2);
+ STORE(3);
+ STORE(4);
+ STORE(5);
+ STORE(6);
+ STORE(7);
+ }
+ src+=src_stride;
+ dst+=dst_stride;
+ }
+}
+
+//This func reads from 2 slices, 0 & 2 and clears 2-nd
+static void store_slice2_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)
+{int y, x;
+#define STORE2(pos) \
+ temp= (src[x + pos] + src[x + pos + 16*src_stride] + (d[pos]>>log2_scale))>>(6-log2_scale); \
+ src[x + pos + 16*src_stride]=0; \
+ if(temp & 0x100) temp= ~(temp>>31); \
+ dst[x + pos]= temp;
+
+ for(y=0; y<height; y++){
+ const uint8_t *d= dither[y];
+ for(x=0; x<width; x+=8){
+ int temp;
+ STORE2(0);
+ STORE2(1);
+ STORE2(2);
+ STORE2(3);
+ STORE2(4);
+ STORE2(5);
+ STORE2(6);
+ STORE2(7);
+ }
+ src+=src_stride;
+ dst+=dst_stride;
+ }
+}
+
+static void mul_thrmat_c(struct vf_priv_s *p,int q)
+{
+ int a;
+ for(a=0;a<64;a++)
+ ((short*)p->threshold_mtx)[a]=q * ((short*)p->threshold_mtx_noq)[a];//ints faster in C
+}
+
+static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt);
+static void row_idct_c(int16_t* workspace,
+ int16_t* output_adr, int output_stride, int cnt);
+static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt);
+
+//this is rather ugly, but there is no need for function pointers
+#define store_slice_s store_slice_c
+#define store_slice2_s store_slice2_c
+#define mul_thrmat_s mul_thrmat_c
+#define column_fidct_s column_fidct_c
+#define row_idct_s row_idct_c
+#define row_fdct_s row_fdct_c
+
+#else /* HAVE_MMX */
+
+//This func reads from 1 slice, 1 and clears 0 & 1
+static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long src_stride, long width, long height, long log2_scale)
+{
+ const uint8_t *od=&dither[0][0];
+ const uint8_t *end=&dither[height][0];
+ width = (width+7)&~7;
+ dst_stride-=width;
+ //src_stride=(src_stride-width)*2;
+ __asm__ volatile(
+ "mov %5, %%"REG_d" \n\t"
+ "mov %6, %%"REG_S" \n\t"
+ "mov %7, %%"REG_D" \n\t"
+ "mov %1, %%"REG_a" \n\t"
+ "movd %%"REG_d", %%mm5 \n\t"
+ "xor $-1, %%"REG_d" \n\t"
+ "mov %%"REG_a", %%"REG_c" \n\t"
+ "add $7, %%"REG_d" \n\t"
+ "neg %%"REG_a" \n\t"
+ "sub %0, %%"REG_c" \n\t"
+ "add %%"REG_c", %%"REG_c" \n\t"
+ "movd %%"REG_d", %%mm2 \n\t"
+ "mov %%"REG_c", %1 \n\t"
+ "mov %2, %%"REG_d" \n\t"
+ "shl $4, %%"REG_a" \n\t"
+
+ "2: \n\t"
+ "movq (%%"REG_d"), %%mm3 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ "punpcklbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm4 \n\t"
+ "mov %0, %%"REG_c" \n\t"
+ "psraw %%mm5, %%mm3 \n\t"
+ "psraw %%mm5, %%mm4 \n\t"
+ "1: \n\t"
+ "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq 8(%%"REG_S"), %%mm1 \n\t"
+
+ "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t"
+ "paddw %%mm3, %%mm0 \n\t"
+ "paddw %%mm4, %%mm1 \n\t"
+
+ "movq %%mm7, (%%"REG_S") \n\t"
+ "psraw %%mm2, %%mm0 \n\t"
+ "psraw %%mm2, %%mm1 \n\t"
+
+ "movq %%mm7, 8(%%"REG_S") \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "add $16, %%"REG_S" \n\t"
+
+ "movq %%mm0, (%%"REG_D") \n\t"
+ "add $8, %%"REG_D" \n\t"
+ "sub $8, %%"REG_c" \n\t"
+ "jg 1b \n\t"
+ "add %1, %%"REG_S" \n\t"
+ "add $8, %%"REG_d" \n\t"
+ "add %3, %%"REG_D" \n\t"
+ "cmp %4, %%"REG_d" \n\t"
+ "jl 2b \n\t"
+
+ :
+ : "m" (width), "m" (src_stride), "erm" (od), "m" (dst_stride), "erm" (end),
+ "m" (log2_scale), "m" (src), "m" (dst) //input
+ : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+ );
+}
+
+//This func reads from 2 slices, 0 & 2 and clears 2-nd
+static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long src_stride, long width, long height, long log2_scale)
+{
+ const uint8_t *od=&dither[0][0];
+ const uint8_t *end=&dither[height][0];
+ width = (width+7)&~7;
+ dst_stride-=width;
+ //src_stride=(src_stride-width)*2;
+ __asm__ volatile(
+ "mov %5, %%"REG_d" \n\t"
+ "mov %6, %%"REG_S" \n\t"
+ "mov %7, %%"REG_D" \n\t"
+ "mov %1, %%"REG_a" \n\t"
+ "movd %%"REG_d", %%mm5 \n\t"
+ "xor $-1, %%"REG_d" \n\t"
+ "mov %%"REG_a", %%"REG_c" \n\t"
+ "add $7, %%"REG_d" \n\t"
+ "sub %0, %%"REG_c" \n\t"
+ "add %%"REG_c", %%"REG_c" \n\t"
+ "movd %%"REG_d", %%mm2 \n\t"
+ "mov %%"REG_c", %1 \n\t"
+ "mov %2, %%"REG_d" \n\t"
+ "shl $5, %%"REG_a" \n\t"
+
+ "2: \n\t"
+ "movq (%%"REG_d"), %%mm3 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ "punpcklbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm4 \n\t"
+ "mov %0, %%"REG_c" \n\t"
+ "psraw %%mm5, %%mm3 \n\t"
+ "psraw %%mm5, %%mm4 \n\t"
+ "1: \n\t"
+ "movq (%%"REG_S"), %%mm0 \n\t"
+ "movq 8(%%"REG_S"), %%mm1 \n\t"
+ "paddw %%mm3, %%mm0 \n\t"
+
+ "paddw (%%"REG_S",%%"REG_a"), %%mm0 \n\t"
+ "paddw %%mm4, %%mm1 \n\t"
+ "movq 8(%%"REG_S",%%"REG_a"), %%mm6 \n\t"
+
+ "movq %%mm7, (%%"REG_S",%%"REG_a") \n\t"
+ "psraw %%mm2, %%mm0 \n\t"
+ "paddw %%mm6, %%mm1 \n\t"
+
+ "movq %%mm7, 8(%%"REG_S",%%"REG_a") \n\t"
+ "psraw %%mm2, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+
+ "movq %%mm0, (%%"REG_D") \n\t"
+ "add $16, %%"REG_S" \n\t"
+ "add $8, %%"REG_D" \n\t"
+ "sub $8, %%"REG_c" \n\t"
+ "jg 1b \n\t"
+ "add %1, %%"REG_S" \n\t"
+ "add $8, %%"REG_d" \n\t"
+ "add %3, %%"REG_D" \n\t"
+ "cmp %4, %%"REG_d" \n\t"
+ "jl 2b \n\t"
+
+ :
+ : "m" (width), "m" (src_stride), "erm" (od), "m" (dst_stride), "erm" (end),
+ "m" (log2_scale), "m" (src), "m" (dst) //input
+ : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_D, "%"REG_S
+ );
+}
+
+static void mul_thrmat_mmx(struct vf_priv_s *p, int q)
+{
+ uint64_t *adr=&p->threshold_mtx_noq[0];
+ __asm__ volatile(
+ "movd %0, %%mm7 \n\t"
+ "add $8*8*2, %%"REG_D" \n\t"
+ "movq 0*8(%%"REG_S"), %%mm0 \n\t"
+ "punpcklwd %%mm7, %%mm7 \n\t"
+ "movq 1*8(%%"REG_S"), %%mm1 \n\t"
+ "punpckldq %%mm7, %%mm7 \n\t"
+ "pmullw %%mm7, %%mm0 \n\t"
+
+ "movq 2*8(%%"REG_S"), %%mm2 \n\t"
+ "pmullw %%mm7, %%mm1 \n\t"
+
+ "movq 3*8(%%"REG_S"), %%mm3 \n\t"
+ "pmullw %%mm7, %%mm2 \n\t"
+
+ "movq %%mm0, 0*8(%%"REG_D") \n\t"
+ "movq 4*8(%%"REG_S"), %%mm4 \n\t"
+ "pmullw %%mm7, %%mm3 \n\t"
+
+ "movq %%mm1, 1*8(%%"REG_D") \n\t"
+ "movq 5*8(%%"REG_S"), %%mm5 \n\t"
+ "pmullw %%mm7, %%mm4 \n\t"
+
+ "movq %%mm2, 2*8(%%"REG_D") \n\t"
+ "movq 6*8(%%"REG_S"), %%mm6 \n\t"
+ "pmullw %%mm7, %%mm5 \n\t"
+
+ "movq %%mm3, 3*8(%%"REG_D") \n\t"
+ "movq 7*8+0*8(%%"REG_S"), %%mm0 \n\t"
+ "pmullw %%mm7, %%mm6 \n\t"
+
+ "movq %%mm4, 4*8(%%"REG_D") \n\t"
+ "movq 7*8+1*8(%%"REG_S"), %%mm1 \n\t"
+ "pmullw %%mm7, %%mm0 \n\t"
+
+ "movq %%mm5, 5*8(%%"REG_D") \n\t"
+ "movq 7*8+2*8(%%"REG_S"), %%mm2 \n\t"
+ "pmullw %%mm7, %%mm1 \n\t"
+
+ "movq %%mm6, 6*8(%%"REG_D") \n\t"
+ "movq 7*8+3*8(%%"REG_S"), %%mm3 \n\t"
+ "pmullw %%mm7, %%mm2 \n\t"
+
+ "movq %%mm0, 7*8+0*8(%%"REG_D") \n\t"
+ "movq 7*8+4*8(%%"REG_S"), %%mm4 \n\t"
+ "pmullw %%mm7, %%mm3 \n\t"
+
+ "movq %%mm1, 7*8+1*8(%%"REG_D") \n\t"
+ "movq 7*8+5*8(%%"REG_S"), %%mm5 \n\t"
+ "pmullw %%mm7, %%mm4 \n\t"
+
+ "movq %%mm2, 7*8+2*8(%%"REG_D") \n\t"
+ "movq 7*8+6*8(%%"REG_S"), %%mm6 \n\t"
+ "pmullw %%mm7, %%mm5 \n\t"
+
+ "movq %%mm3, 7*8+3*8(%%"REG_D") \n\t"
+ "movq 14*8+0*8(%%"REG_S"), %%mm0 \n\t"
+ "pmullw %%mm7, %%mm6 \n\t"
+
+ "movq %%mm4, 7*8+4*8(%%"REG_D") \n\t"
+ "movq 14*8+1*8(%%"REG_S"), %%mm1 \n\t"
+ "pmullw %%mm7, %%mm0 \n\t"
+
+ "movq %%mm5, 7*8+5*8(%%"REG_D") \n\t"
+ "pmullw %%mm7, %%mm1 \n\t"
+
+ "movq %%mm6, 7*8+6*8(%%"REG_D") \n\t"
+ "movq %%mm0, 14*8+0*8(%%"REG_D") \n\t"
+ "movq %%mm1, 14*8+1*8(%%"REG_D") \n\t"
+
+ : "+g" (q), "+S" (adr), "+D" (adr)
+ :
+ );
+}
+
+static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt);
+static void row_idct_mmx(int16_t* workspace,
+ int16_t* output_adr, int output_stride, int cnt);
+static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt);
+
+#define store_slice_s store_slice_mmx
+#define store_slice2_s store_slice2_mmx
+#define mul_thrmat_s mul_thrmat_mmx
+#define column_fidct_s column_fidct_mmx
+#define row_idct_s row_idct_mmx
+#define row_fdct_s row_fdct_mmx
+#endif // HAVE_MMX
+
+static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src,
+ int dst_stride, int src_stride,
+ int width, int height,
+ uint8_t *qp_store, int qp_stride, int is_luma)
+{
+ int x, x0, y, es, qy, t;
+ const int stride= is_luma ? p->temp_stride : (width+16);//((width+16+15)&(~15))
+ const int step=6-p->log2_count;
+ const int qps= 3 + is_luma;
+ DECLARE_ALIGNED(32, int32_t, block_align)[4*8*BLOCKSZ+ 4*8*BLOCKSZ];
+ int16_t *block= (int16_t *)block_align;
+ int16_t *block3=(int16_t *)(block_align+4*8*BLOCKSZ);
+
+ memset(block3, 0, 4*8*BLOCKSZ);
+
+ //p->src=src-src_stride*8-8;//!
+ if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
+ for(y=0; y<height; y++){
+ int index= 8 + 8*stride + y*stride;
+ fast_memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers
+ for(x=0; x<8; x++){
+ p->src[index - x - 1]= p->src[index + x ];
+ p->src[index + width + x ]= p->src[index + width - x - 1];
+ }
+ }
+ for(y=0; y<8; y++){
+ fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
+ fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
+ }
+ //FIXME (try edge emu)
+
+ for(y=8; y<24; y++)
+ memset(p->temp+ 8 +y*stride, 0,width*sizeof(int16_t));
+
+ for(y=step; y<height+8; y+=step){ //step= 1,2
+ qy=y-4;
+ if (qy>height-1) qy=height-1;
+ if (qy<0) qy=0;
+ qy=(qy>>qps)*qp_stride;
+ row_fdct_s(block, p->src + y*stride +2-(y&1), stride, 2);
+ for(x0=0; x0<width+8-8*(BLOCKSZ-1); x0+=8*(BLOCKSZ-1)){
+ row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, 2*(BLOCKSZ-1));
+ if(p->qp)
+ column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+0*8, block3+0*8, 8*(BLOCKSZ-1)); //yes, this is a HOTSPOT
+ else
+ for (x=0; x<8*(BLOCKSZ-1); x+=8) {
+ t=x+x0-2; //correct t=x+x0-2-(y&1), but its the same
+ if (t<0) t=0;//t always < width-2
+ t=qp_store[qy+(t>>qps)];
+ t=norm_qscale(t, p->mpeg2);
+ if (t!=p->prev_q) p->prev_q=t, mul_thrmat_s(p, t);
+ column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT
+ }
+ row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1));
+ memmove(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(int16_t)); //cycling
+ memmove(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(int16_t));
+ }
+ //
+ es=width+8-x0; // 8, ...
+ if (es>8)
+ row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, (es-4)>>2);
+ column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block, block3, es&(~1));
+ row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, es>>2);
+ {const int y1=y-8+step;//l5-7 l4-6
+ if (!(y1&7) && y1) {
+ if (y1&8) store_slice_s(dst + (y1-8)*dst_stride, p->temp+ 8 +8*stride,
+ dst_stride, stride, width, 8, 5-p->log2_count);
+ else store_slice2_s(dst + (y1-8)*dst_stride, p->temp+ 8 +0*stride,
+ dst_stride, stride, width, 8, 5-p->log2_count);
+ } }
+ }
+
+ if (y&7) { // == height & 7
+ if (y&8) store_slice_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +8*stride,
+ dst_stride, stride, width, y&7, 5-p->log2_count);
+ else store_slice2_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +0*stride,
+ dst_stride, stride, width, y&7, 5-p->log2_count);
+ }
+}
+
+static int config(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt)
+{
+ int h= (height+16+15)&(~15);
+
+ vf->priv->temp_stride= (width+16+15)&(~15);
+ vf->priv->temp= (int16_t*)av_mallocz(vf->priv->temp_stride*3*8*sizeof(int16_t));
+ //this can also be avoided, see above
+ vf->priv->src = (uint8_t*)av_malloc(vf->priv->temp_stride*h*sizeof(uint8_t));
+
+ return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
+}
+
+static void get_image(struct vf_instance *vf, mp_image_t *mpi)
+{
+ if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change
+ // ok, we can do pp in-place (or pp disabled):
+ vf->dmpi=ff_vf_get_image(vf->next,mpi->imgfmt,
+ mpi->type, mpi->flags, mpi->width, mpi->height);
+ mpi->planes[0]=vf->dmpi->planes[0];
+ mpi->stride[0]=vf->dmpi->stride[0];
+ mpi->width=vf->dmpi->width;
+ if(mpi->flags&MP_IMGFLAG_PLANAR){
+ mpi->planes[1]=vf->dmpi->planes[1];
+ mpi->planes[2]=vf->dmpi->planes[2];
+ mpi->stride[1]=vf->dmpi->stride[1];
+ mpi->stride[2]=vf->dmpi->stride[2];
+ }
+ mpi->flags|=MP_IMGFLAG_DIRECT;
+}
+
+static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
+{
+ mp_image_t *dmpi;
+ if(!(mpi->flags&MP_IMGFLAG_DIRECT)){
+ // no DR, so get a new image! hope we'll get DR buffer:
+ dmpi=ff_vf_get_image(vf->next,mpi->imgfmt,
+ MP_IMGTYPE_TEMP,
+ MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
+ mpi->width,mpi->height);
+ ff_vf_clone_mpi_attributes(dmpi, mpi);
+ }else{
+ dmpi=vf->dmpi;
+ }
+
+ vf->priv->mpeg2= mpi->qscale_type;
+ if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
+ int w = mpi->qstride;
+ int h = (mpi->h + 15) >> 4;
+ if (!w) {
+ w = (mpi->w + 15) >> 4;
+ h = 1;
+ }
+ if(!vf->priv->non_b_qp)
+ vf->priv->non_b_qp= malloc(w*h);
+ fast_memcpy(vf->priv->non_b_qp, mpi->qscale, w*h);
+ }
+ if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
+ char *qp_tab= vf->priv->non_b_qp;
+ if(vf->priv->bframes || !qp_tab)
+ qp_tab= mpi->qscale;
+
+ if(qp_tab || vf->priv->qp){
+ filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0],
+ mpi->w, mpi->h, qp_tab, mpi->qstride, 1);
+ filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1],
+ mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);
+ filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2],
+ mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);
+ }else{
+ memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);
+ memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);
+ memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);
+ }
+ }
+
+#if HAVE_MMX
+ if(ff_gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
+#endif
+#if HAVE_MMX2
+ if(ff_gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
+#endif
+ return ff_vf_next_put_image(vf,dmpi, pts);
+}
+
+static void uninit(struct vf_instance *vf)
+{
+ if(!vf->priv) return;
+
+ av_free(vf->priv->temp);
+ vf->priv->temp= NULL;
+ av_free(vf->priv->src);
+ vf->priv->src= NULL;
+ //free(vf->priv->avctx);
+ //vf->priv->avctx= NULL;
+ free(vf->priv->non_b_qp);
+ vf->priv->non_b_qp= NULL;
+
+ av_free(vf->priv);
+ vf->priv=NULL;
+}
+
+//===========================================================================//
+
+static int query_format(struct vf_instance *vf, unsigned int fmt)
+{
+ switch(fmt){
+ case IMGFMT_YVU9:
+ case IMGFMT_IF09:
+ case IMGFMT_YV12:
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ case IMGFMT_CLPL:
+ case IMGFMT_Y800:
+ case IMGFMT_Y8:
+ case IMGFMT_444P:
+ case IMGFMT_422P:
+ case IMGFMT_411P:
+ return ff_vf_next_query_format(vf,fmt);
+ }
+ return 0;
+}
+
+static int control(struct vf_instance *vf, int request, void* data)
+{
+ switch(request){
+ case VFCTRL_QUERY_MAX_PP_LEVEL:
+ return 5;
+ case VFCTRL_SET_PP_LEVEL:
+ vf->priv->log2_count= *((unsigned int*)data);
+ if (vf->priv->log2_count < 4) vf->priv->log2_count=4;
+ return CONTROL_TRUE;
+ }
+ return ff_vf_next_control(vf,request,data);
+}
+
+static int vf_open(vf_instance_t *vf, char *args)
+{
+ int i=0, bias;
+ int custom_threshold_m[64];
+ int log2c=-1;
+
+ vf->config=config;
+ vf->put_image=put_image;
+ vf->get_image=get_image;
+ vf->query_format=query_format;
+ vf->uninit=uninit;
+ vf->control= control;
+ vf->priv=av_mallocz(sizeof(struct vf_priv_s));//assumes align 16 !
+
+ ff_init_avcodec();
+
+ //vf->priv->avctx= avcodec_alloc_context();
+ //dsputil_init(&vf->priv->dsp, vf->priv->avctx);
+
+ vf->priv->log2_count= 4;
+ vf->priv->bframes = 0;
+
+ if (args) sscanf(args, "%d:%d:%d:%d", &log2c, &vf->priv->qp, &i, &vf->priv->bframes);
+
+ if( log2c >=4 && log2c <=5 )
+ vf->priv->log2_count = log2c;
+ else if( log2c >= 6 )
+ vf->priv->log2_count = 5;
+
+ if(vf->priv->qp < 0)
+ vf->priv->qp = 0;
+
+ if (i < -15) i = -15;
+ if (i > 32) i = 32;
+
+ bias= (1<<4)+i; //regulable
+ vf->priv->prev_q=0;
+ //
+ for(i=0;i<64;i++) //FIXME: tune custom_threshold[] and remove this !
+ custom_threshold_m[i]=(int)(custom_threshold[i]*(bias/71.)+ 0.5);
+ for(i=0;i<8;i++){
+ vf->priv->threshold_mtx_noq[2*i]=(uint64_t)custom_threshold_m[i*8+2]
+ |(((uint64_t)custom_threshold_m[i*8+6])<<16)
+ |(((uint64_t)custom_threshold_m[i*8+0])<<32)
+ |(((uint64_t)custom_threshold_m[i*8+4])<<48);
+ vf->priv->threshold_mtx_noq[2*i+1]=(uint64_t)custom_threshold_m[i*8+5]
+ |(((uint64_t)custom_threshold_m[i*8+3])<<16)
+ |(((uint64_t)custom_threshold_m[i*8+1])<<32)
+ |(((uint64_t)custom_threshold_m[i*8+7])<<48);
+ }
+
+ if (vf->priv->qp) vf->priv->prev_q=vf->priv->qp, mul_thrmat_s(vf->priv, vf->priv->qp);
+
+ return 1;
+}
+
+const vf_info_t ff_vf_info_fspp = {
+ "fast simple postprocess",
+ "fspp",
+ "Michael Niedermayer, Nikolaj Poroshin",
+ "",
+ vf_open,
+ NULL
+};
+
+//====================================================================
+//Specific spp's dct, idct and threshold functions
+//I'd prefer to have them in the separate file.
+
+//#define MANGLE(a) #a
+
+//typedef int16_t int16_t; //! only int16_t
+
+#define DCTSIZE 8
+#define DCTSIZE_S "8"
+
+#define FIX(x,s) ((int) ((x) * (1<<s) + 0.5)&0xffff)
+#define C64(x) ((uint64_t)((x)|(x)<<16))<<32 | (uint64_t)(x) | (uint64_t)(x)<<16
+#define FIX64(x,s) C64(FIX(x,s))
+
+#define MULTIPLY16H(x,k) (((x)*(k))>>16)
+#define THRESHOLD(r,x,t) if(((unsigned)((x)+t))>t*2) r=(x);else r=0;
+#define DESCALE(x,n) (((x) + (1 << ((n)-1))) >> n)
+
+#if HAVE_MMX
+
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_382683433)=FIX64(0.382683433, 14);
+DECLARE_ALIGNED(8, uint64_t, ff_MM_FIX_0_541196100)=FIX64(0.541196100, 14);
+DECLARE_ALIGNED(8, uint64_t, ff_MM_FIX_0_707106781)=FIX64(0.707106781, 14);
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_306562965)=FIX64(1.306562965, 14);
+
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_414213562_A)=FIX64(1.414213562, 14);
+
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_847759065)=FIX64(1.847759065, 13);
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_2_613125930)=FIX64(-2.613125930, 13); //-
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_414213562)=FIX64(1.414213562, 13);
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_082392200)=FIX64(1.082392200, 13);
+//for t3,t5,t7 == 0 shortcut
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_847759065)=FIX64(0.847759065, 14);
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_566454497)=FIX64(0.566454497, 14);
+DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_198912367)=FIX64(0.198912367, 14);
+
+DECLARE_ASM_CONST(8, uint64_t, MM_DESCALE_RND)=C64(4);
+DECLARE_ASM_CONST(8, uint64_t, MM_2)=C64(2);
+
+#else /* !HAVE_MMX */
+
+typedef int32_t int_simd16_t;
+static const int16_t FIX_0_382683433=FIX(0.382683433, 14);
+static const int16_t FIX_0_541196100=FIX(0.541196100, 14);
+static const int16_t FIX_0_707106781=FIX(0.707106781, 14);
+static const int16_t FIX_1_306562965=FIX(1.306562965, 14);
+static const int16_t FIX_1_414213562_A=FIX(1.414213562, 14);
+static const int16_t FIX_1_847759065=FIX(1.847759065, 13);
+static const int16_t FIX_2_613125930=FIX(-2.613125930, 13); //-
+static const int16_t FIX_1_414213562=FIX(1.414213562, 13);
+static const int16_t FIX_1_082392200=FIX(1.082392200, 13);
+
+#endif
+
+#if !HAVE_MMX
+
+static void column_fidct_c(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt)
+{
+ int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int_simd16_t tmp10, tmp11, tmp12, tmp13;
+ int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
+ int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
+
+ int16_t* dataptr;
+ int16_t* wsptr;
+ int16_t *threshold;
+ int ctr;
+
+ dataptr = data;
+ wsptr = output;
+
+ for (; cnt > 0; cnt-=2) { //start positions
+ threshold=(int16_t*)thr_adr;//threshold_mtx
+ for (ctr = DCTSIZE; ctr > 0; ctr--) {
+ // Process columns from input, add to output.
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+ tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+ tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+ tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+ // Even part of FDCT
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ d0 = tmp10 + tmp11;
+ d4 = tmp10 - tmp11;
+
+ z1 = MULTIPLY16H((tmp12 + tmp13) <<2, FIX_0_707106781);
+ d2 = tmp13 + z1;
+ d6 = tmp13 - z1;
+
+ // Even part of IDCT
+
+ THRESHOLD(tmp0, d0, threshold[0*8]);
+ THRESHOLD(tmp1, d2, threshold[2*8]);
+ THRESHOLD(tmp2, d4, threshold[4*8]);
+ THRESHOLD(tmp3, d6, threshold[6*8]);
+ tmp0+=2;
+ tmp10 = (tmp0 + tmp2)>>2;
+ tmp11 = (tmp0 - tmp2)>>2;
+
+ tmp13 = (tmp1 + tmp3)>>2; //+2 ! (psnr decides)
+ tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
+
+ tmp0 = tmp10 + tmp13; //->temps
+ tmp3 = tmp10 - tmp13; //->temps
+ tmp1 = tmp11 + tmp12; //->temps
+ tmp2 = tmp11 - tmp12; //->temps
+
+ // Odd part of FDCT
+
+ tmp10 = tmp4 + tmp5;
+ tmp11 = tmp5 + tmp6;
+ tmp12 = tmp6 + tmp7;
+
+ z5 = MULTIPLY16H((tmp10 - tmp12)<<2, FIX_0_382683433);
+ z2 = MULTIPLY16H(tmp10 <<2, FIX_0_541196100) + z5;
+ z4 = MULTIPLY16H(tmp12 <<2, FIX_1_306562965) + z5;
+ z3 = MULTIPLY16H(tmp11 <<2, FIX_0_707106781);
+
+ z11 = tmp7 + z3;
+ z13 = tmp7 - z3;
+
+ d5 = z13 + z2;
+ d3 = z13 - z2;
+ d1 = z11 + z4;
+ d7 = z11 - z4;
+
+ // Odd part of IDCT
+
+ THRESHOLD(tmp4, d1, threshold[1*8]);
+ THRESHOLD(tmp5, d3, threshold[3*8]);
+ THRESHOLD(tmp6, d5, threshold[5*8]);
+ THRESHOLD(tmp7, d7, threshold[7*8]);
+
+ //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
+ z13 = tmp6 + tmp5;
+ z10 = (tmp6 - tmp5)<<1;
+ z11 = tmp4 + tmp7;
+ z12 = (tmp4 - tmp7)<<1;
+
+ tmp7 = (z11 + z13)>>2; //+2 !
+ tmp11 = MULTIPLY16H((z11 - z13)<<1, FIX_1_414213562);
+ z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
+ tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
+ tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
+
+ tmp6 = tmp12 - tmp7;
+ tmp5 = tmp11 - tmp6;
+ tmp4 = tmp10 + tmp5;
+
+ wsptr[DCTSIZE*0]+= (tmp0 + tmp7);
+ wsptr[DCTSIZE*1]+= (tmp1 + tmp6);
+ wsptr[DCTSIZE*2]+= (tmp2 + tmp5);
+ wsptr[DCTSIZE*3]+= (tmp3 - tmp4);
+ wsptr[DCTSIZE*4]+= (tmp3 + tmp4);
+ wsptr[DCTSIZE*5]+= (tmp2 - tmp5);
+ wsptr[DCTSIZE*6]= (tmp1 - tmp6);
+ wsptr[DCTSIZE*7]= (tmp0 - tmp7);
+ //
+ dataptr++; //next column
+ wsptr++;
+ threshold++;
+ }
+ dataptr+=8; //skip each second start pos
+ wsptr +=8;
+ }
+}
+
+#else /* HAVE_MMX */
+
+static void column_fidct_mmx(int16_t* thr_adr, int16_t *data, int16_t *output, int cnt)
+{
+ DECLARE_ALIGNED(8, uint64_t, temps)[4];
+ __asm__ volatile(
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
+ //
+ "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t"
+ "movq %%mm1, %%mm0 \n\t"
+
+ "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0
+ "movq %%mm7, %%mm3 \n\t"
+
+ "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3
+ "movq %%mm1, %%mm5 \n\t"
+
+ "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t"
+ "psubw %%mm7, %%mm1 \n\t" //t13
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
+ "movq %%mm6, %%mm4 \n\t"
+
+ "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1
+ "paddw %%mm7, %%mm5 \n\t" //t10
+
+ "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2
+ "movq %%mm6, %%mm7 \n\t"
+
+ "paddw %%mm2, %%mm6 \n\t" //t11
+ "psubw %%mm2, %%mm7 \n\t" //t12
+
+ "movq %%mm5, %%mm2 \n\t"
+ "paddw %%mm6, %%mm5 \n\t" //d0
+ // i0 t13 t12 i3 i1 d0 - d4
+ "psubw %%mm6, %%mm2 \n\t" //d4
+ "paddw %%mm1, %%mm7 \n\t"
+
+ "movq 4*16(%%"REG_d"), %%mm6 \n\t"
+ "psllw $2, %%mm7 \n\t"
+
+ "psubw 0*16(%%"REG_d"), %%mm5 \n\t"
+ "psubw %%mm6, %%mm2 \n\t"
+
+ "paddusw 0*16(%%"REG_d"), %%mm5 \n\t"
+ "paddusw %%mm6, %%mm2 \n\t"
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm7 \n\t"
+ //
+ "paddw 0*16(%%"REG_d"), %%mm5 \n\t"
+ "paddw %%mm6, %%mm2 \n\t"
+
+ "psubusw 0*16(%%"REG_d"), %%mm5 \n\t"
+ "psubusw %%mm6, %%mm2 \n\t"
+
+//This func is totally compute-bound, operates at huge speed. So, DC shortcut
+// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).
+//However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare.
+ "paddw "MANGLE(MM_2)", %%mm5 \n\t"
+ "movq %%mm2, %%mm6 \n\t"
+
+ "paddw %%mm5, %%mm2 \n\t"
+ "psubw %%mm6, %%mm5 \n\t"
+
+ "movq %%mm1, %%mm6 \n\t"
+ "paddw %%mm7, %%mm1 \n\t" //d2
+
+ "psubw 2*16(%%"REG_d"), %%mm1 \n\t"
+ "psubw %%mm7, %%mm6 \n\t" //d6
+
+ "movq 6*16(%%"REG_d"), %%mm7 \n\t"
+ "psraw $2, %%mm5 \n\t"
+
+ "paddusw 2*16(%%"REG_d"), %%mm1 \n\t"
+ "psubw %%mm7, %%mm6 \n\t"
+ // t7 d2 /t11 t4 t6 - d6 /t10
+
+ "paddw 2*16(%%"REG_d"), %%mm1 \n\t"
+ "paddusw %%mm7, %%mm6 \n\t"
+
+ "psubusw 2*16(%%"REG_d"), %%mm1 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+
+ "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t"
+ "psubusw %%mm7, %%mm6 \n\t"
+
+ //movq [edi+"DCTSIZE_S"*2*2], mm1
+ //movq [edi+"DCTSIZE_S"*6*2], mm6
+ "movq %%mm1, %%mm7 \n\t"
+ "psraw $2, %%mm2 \n\t"
+
+ "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t"
+ "psubw %%mm6, %%mm1 \n\t"
+
+ "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t"
+ "paddw %%mm7, %%mm6 \n\t" //'t13
+
+ "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! ---
+ "movq %%mm2, %%mm7 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"
+ "paddw %%mm6, %%mm2 \n\t" //'t0
+
+ "movq %%mm2, 0*8+%3 \n\t" //!
+ "psubw %%mm6, %%mm7 \n\t" //'t3
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
+ "psubw %%mm6, %%mm1 \n\t" //'t12
+
+ "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5
+ "movq %%mm5, %%mm6 \n\t"
+
+ "movq %%mm7, 3*8+%3 \n\t"
+ "paddw %%mm2, %%mm3 \n\t" //t10
+
+ "paddw %%mm4, %%mm2 \n\t" //t11
+ "paddw %%mm0, %%mm4 \n\t" //t12
+
+ "movq %%mm3, %%mm7 \n\t"
+ "psubw %%mm4, %%mm3 \n\t"
+
+ "psllw $2, %%mm3 \n\t"
+ "psllw $2, %%mm7 \n\t" //opt for P6
+
+ "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t"
+ "psllw $2, %%mm4 \n\t"
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_541196100)", %%mm7 \n\t"
+ "psllw $2, %%mm2 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t"
+ "paddw %%mm1, %%mm5 \n\t" //'t1
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm2 \n\t"
+ "psubw %%mm1, %%mm6 \n\t" //'t2
+ // t7 't12 't11 t4 t6 - 't13 't10 ---
+
+ "paddw %%mm3, %%mm7 \n\t" //z2
+
+ "movq %%mm5, 1*8+%3 \n\t"
+ "paddw %%mm3, %%mm4 \n\t" //z4
+
+ "movq 3*16(%%"REG_d"), %%mm3 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+
+ "movq %%mm6, 2*8+%3 \n\t"
+ "psubw %%mm2, %%mm1 \n\t" //z13
+
+//===
+ "paddw %%mm2, %%mm0 \n\t" //z11
+ "movq %%mm1, %%mm5 \n\t"
+
+ "movq 5*16(%%"REG_d"), %%mm2 \n\t"
+ "psubw %%mm7, %%mm1 \n\t" //d3
+
+ "paddw %%mm7, %%mm5 \n\t" //d5
+ "psubw %%mm3, %%mm1 \n\t"
+
+ "movq 1*16(%%"REG_d"), %%mm7 \n\t"
+ "psubw %%mm2, %%mm5 \n\t"
+
+ "movq %%mm0, %%mm6 \n\t"
+ "paddw %%mm4, %%mm0 \n\t" //d1
+
+ "paddusw %%mm3, %%mm1 \n\t"
+ "psubw %%mm4, %%mm6 \n\t" //d7
+
+ // d1 d3 - - - d5 d7 -
+ "movq 7*16(%%"REG_d"), %%mm4 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+
+ "psubw %%mm4, %%mm6 \n\t"
+ "paddusw %%mm2, %%mm5 \n\t"
+
+ "paddusw %%mm4, %%mm6 \n\t"
+ "paddw %%mm3, %%mm1 \n\t"
+
+ "paddw %%mm2, %%mm5 \n\t"
+ "paddw %%mm4, %%mm6 \n\t"
+
+ "psubusw %%mm3, %%mm1 \n\t"
+ "psubusw %%mm2, %%mm5 \n\t"
+
+ "psubusw %%mm4, %%mm6 \n\t"
+ "movq %%mm1, %%mm4 \n\t"
+
+ "por %%mm5, %%mm4 \n\t"
+ "paddusw %%mm7, %%mm0 \n\t"
+
+ "por %%mm6, %%mm4 \n\t"
+ "paddw %%mm7, %%mm0 \n\t"
+
+ "packssdw %%mm4, %%mm4 \n\t"
+ "psubusw %%mm7, %%mm0 \n\t"
+
+ "movd %%mm4, %%"REG_a" \n\t"
+ "or %%"REG_a", %%"REG_a" \n\t"
+ "jnz 2f \n\t"
+ //movq [edi+"DCTSIZE_S"*3*2], mm1
+ //movq [edi+"DCTSIZE_S"*5*2], mm5
+ //movq [edi+"DCTSIZE_S"*1*2], mm0
+ //movq [edi+"DCTSIZE_S"*7*2], mm6
+ // t4 t5 - - - t6 t7 -
+ //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0
+//Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile
+ "movq 0*8+%3, %%mm4 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6
+ "movq %%mm1, %%mm2 \n\t"
+
+ "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5
+ "paddw %%mm4, %%mm5 \n\t"
+
+ "movq 1*8+%3, %%mm6 \n\t"
+ //paddw mm3, MM_2
+ "psraw $2, %%mm3 \n\t" //tmp7
+
+ "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4
+ "psubw %%mm3, %%mm4 \n\t"
+
+ "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t"
+ "paddw %%mm3, %%mm5 \n\t"
+
+ "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
+ "paddw %%mm6, %%mm7 \n\t"
+
+ "movq 2*8+%3, %%mm3 \n\t"
+ "psubw %%mm0, %%mm6 \n\t"
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"
+ "paddw %%mm0, %%mm7 \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
+ "psubw %%mm1, %%mm3 \n\t"
+
+ "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t"
+ "paddw %%mm1, %%mm4 \n\t"
+
+ "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"
+ "paddw %%mm3, %%mm5 \n\t"
+
+ "movq 3*8+%3, %%mm0 \n\t"
+ "add $8, %%"REG_S" \n\t"
+
+ "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+
+ "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+
+ "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t"
+ "paddw %%mm2, %%mm6 \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
+ "paddw %%mm0, %%mm7 \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
+
+ "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
+ "add $8, %%"REG_D" \n\t"
+ "jmp 4f \n\t"
+
+ "2: \n\t"
+ //--- non DC2
+ //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1)
+ //psraw mm5, 2
+ //psraw mm0, 2
+ //psraw mm6, 2
+ "movq %%mm5, %%mm3 \n\t"
+ "psubw %%mm1, %%mm5 \n\t"
+
+ "psllw $1, %%mm5 \n\t" //'z10
+ "paddw %%mm1, %%mm3 \n\t" //'z13
+
+ "movq %%mm0, %%mm2 \n\t"
+ "psubw %%mm6, %%mm0 \n\t"
+
+ "movq %%mm5, %%mm1 \n\t"
+ "psllw $1, %%mm0 \n\t" //'z12
+
+ "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //-
+ "paddw %%mm0, %%mm5 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5
+ "paddw %%mm6, %%mm2 \n\t" //'z11
+
+ "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t"
+ "movq %%mm2, %%mm7 \n\t"
+
+ //---
+ "movq 0*8+%3, %%mm4 \n\t"
+ "psubw %%mm3, %%mm2 \n\t"
+
+ "psllw $1, %%mm2 \n\t"
+ "paddw %%mm3, %%mm7 \n\t" //'t7
+
+ "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11
+ "movq %%mm4, %%mm6 \n\t"
+ //paddw mm7, MM_2
+ "psraw $2, %%mm7 \n\t"
+
+ "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"
+ "psubw %%mm7, %%mm6 \n\t"
+
+ "movq 1*8+%3, %%mm3 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
+ "paddw %%mm5, %%mm1 \n\t" //'t12
+
+ "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
+ "psubw %%mm7, %%mm1 \n\t" //'t6
+
+ "movq 2*8+%3, %%mm7 \n\t"
+ "psubw %%mm5, %%mm0 \n\t" //'t10
+
+ "movq 3*8+%3, %%mm6 \n\t"
+ "movq %%mm3, %%mm5 \n\t"
+
+ "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"
+ "psubw %%mm1, %%mm5 \n\t"
+
+ "psubw %%mm1, %%mm2 \n\t" //'t5
+ "paddw %%mm1, %%mm3 \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
+ "movq %%mm7, %%mm4 \n\t"
+
+ "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t"
+ "psubw %%mm2, %%mm4 \n\t"
+
+ "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t"
+ "paddw %%mm2, %%mm7 \n\t"
+
+ "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
+ "paddw %%mm2, %%mm0 \n\t" //'t4
+
+ // 't4 't6 't5 - - - - 't7
+ "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
+ "movq %%mm6, %%mm1 \n\t"
+
+ "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+
+ "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+
+ "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
+ "add $8, %%"REG_S" \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
+
+ "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
+ "add $8, %%"REG_D" \n\t"
+
+ "4: \n\t"
+//=part 2 (the same)===========================================================
+ "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
+ //
+ "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t"
+ "movq %%mm1, %%mm0 \n\t"
+
+ "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0
+ "movq %%mm7, %%mm3 \n\t"
+
+ "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3
+ "movq %%mm1, %%mm5 \n\t"
+
+ "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t"
+ "psubw %%mm7, %%mm1 \n\t" //t13
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
+ "movq %%mm6, %%mm4 \n\t"
+
+ "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1
+ "paddw %%mm7, %%mm5 \n\t" //t10
+
+ "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2
+ "movq %%mm6, %%mm7 \n\t"
+
+ "paddw %%mm2, %%mm6 \n\t" //t11
+ "psubw %%mm2, %%mm7 \n\t" //t12
+
+ "movq %%mm5, %%mm2 \n\t"
+ "paddw %%mm6, %%mm5 \n\t" //d0
+ // i0 t13 t12 i3 i1 d0 - d4
+ "psubw %%mm6, %%mm2 \n\t" //d4
+ "paddw %%mm1, %%mm7 \n\t"
+
+ "movq 1*8+4*16(%%"REG_d"), %%mm6 \n\t"
+ "psllw $2, %%mm7 \n\t"
+
+ "psubw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
+ "psubw %%mm6, %%mm2 \n\t"
+
+ "paddusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
+ "paddusw %%mm6, %%mm2 \n\t"
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm7 \n\t"
+ //
+ "paddw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
+ "paddw %%mm6, %%mm2 \n\t"
+
+ "psubusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
+ "psubusw %%mm6, %%mm2 \n\t"
+
+//This func is totally compute-bound, operates at huge speed. So, DC shortcut
+// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).
+//However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare.
+ "paddw "MANGLE(MM_2)", %%mm5 \n\t"
+ "movq %%mm2, %%mm6 \n\t"
+
+ "paddw %%mm5, %%mm2 \n\t"
+ "psubw %%mm6, %%mm5 \n\t"
+
+ "movq %%mm1, %%mm6 \n\t"
+ "paddw %%mm7, %%mm1 \n\t" //d2
+
+ "psubw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
+ "psubw %%mm7, %%mm6 \n\t" //d6
+
+ "movq 1*8+6*16(%%"REG_d"), %%mm7 \n\t"
+ "psraw $2, %%mm5 \n\t"
+
+ "paddusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
+ "psubw %%mm7, %%mm6 \n\t"
+ // t7 d2 /t11 t4 t6 - d6 /t10
+
+ "paddw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
+ "paddusw %%mm7, %%mm6 \n\t"
+
+ "psubusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+
+ "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t"
+ "psubusw %%mm7, %%mm6 \n\t"
+
+ //movq [edi+"DCTSIZE_S"*2*2], mm1
+ //movq [edi+"DCTSIZE_S"*6*2], mm6
+ "movq %%mm1, %%mm7 \n\t"
+ "psraw $2, %%mm2 \n\t"
+
+ "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t"
+ "psubw %%mm6, %%mm1 \n\t"
+
+ "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t"
+ "paddw %%mm7, %%mm6 \n\t" //'t13
+
+ "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! ---
+ "movq %%mm2, %%mm7 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"
+ "paddw %%mm6, %%mm2 \n\t" //'t0
+
+ "movq %%mm2, 0*8+%3 \n\t" //!
+ "psubw %%mm6, %%mm7 \n\t" //'t3
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
+ "psubw %%mm6, %%mm1 \n\t" //'t12
+
+ "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5
+ "movq %%mm5, %%mm6 \n\t"
+
+ "movq %%mm7, 3*8+%3 \n\t"
+ "paddw %%mm2, %%mm3 \n\t" //t10
+
+ "paddw %%mm4, %%mm2 \n\t" //t11
+ "paddw %%mm0, %%mm4 \n\t" //t12
+
+ "movq %%mm3, %%mm7 \n\t"
+ "psubw %%mm4, %%mm3 \n\t"
+
+ "psllw $2, %%mm3 \n\t"
+ "psllw $2, %%mm7 \n\t" //opt for P6
+
+ "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t"
+ "psllw $2, %%mm4 \n\t"
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_541196100)", %%mm7 \n\t"
+ "psllw $2, %%mm2 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t"
+ "paddw %%mm1, %%mm5 \n\t" //'t1
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm2 \n\t"
+ "psubw %%mm1, %%mm6 \n\t" //'t2
+ // t7 't12 't11 t4 t6 - 't13 't10 ---
+
+ "paddw %%mm3, %%mm7 \n\t" //z2
+
+ "movq %%mm5, 1*8+%3 \n\t"
+ "paddw %%mm3, %%mm4 \n\t" //z4
+
+ "movq 1*8+3*16(%%"REG_d"), %%mm3 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+
+ "movq %%mm6, 2*8+%3 \n\t"
+ "psubw %%mm2, %%mm1 \n\t" //z13
+
+//===
+ "paddw %%mm2, %%mm0 \n\t" //z11
+ "movq %%mm1, %%mm5 \n\t"
+
+ "movq 1*8+5*16(%%"REG_d"), %%mm2 \n\t"
+ "psubw %%mm7, %%mm1 \n\t" //d3
+
+ "paddw %%mm7, %%mm5 \n\t" //d5
+ "psubw %%mm3, %%mm1 \n\t"
+
+ "movq 1*8+1*16(%%"REG_d"), %%mm7 \n\t"
+ "psubw %%mm2, %%mm5 \n\t"
+
+ "movq %%mm0, %%mm6 \n\t"
+ "paddw %%mm4, %%mm0 \n\t" //d1
+
+ "paddusw %%mm3, %%mm1 \n\t"
+ "psubw %%mm4, %%mm6 \n\t" //d7
+
+ // d1 d3 - - - d5 d7 -
+ "movq 1*8+7*16(%%"REG_d"), %%mm4 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+
+ "psubw %%mm4, %%mm6 \n\t"
+ "paddusw %%mm2, %%mm5 \n\t"
+
+ "paddusw %%mm4, %%mm6 \n\t"
+ "paddw %%mm3, %%mm1 \n\t"
+
+ "paddw %%mm2, %%mm5 \n\t"
+ "paddw %%mm4, %%mm6 \n\t"
+
+ "psubusw %%mm3, %%mm1 \n\t"
+ "psubusw %%mm2, %%mm5 \n\t"
+
+ "psubusw %%mm4, %%mm6 \n\t"
+ "movq %%mm1, %%mm4 \n\t"
+
+ "por %%mm5, %%mm4 \n\t"
+ "paddusw %%mm7, %%mm0 \n\t"
+
+ "por %%mm6, %%mm4 \n\t"
+ "paddw %%mm7, %%mm0 \n\t"
+
+ "packssdw %%mm4, %%mm4 \n\t"
+ "psubusw %%mm7, %%mm0 \n\t"
+
+ "movd %%mm4, %%"REG_a" \n\t"
+ "or %%"REG_a", %%"REG_a" \n\t"
+ "jnz 3f \n\t"
+ //movq [edi+"DCTSIZE_S"*3*2], mm1
+ //movq [edi+"DCTSIZE_S"*5*2], mm5
+ //movq [edi+"DCTSIZE_S"*1*2], mm0
+ //movq [edi+"DCTSIZE_S"*7*2], mm6
+ // t4 t5 - - - t6 t7 -
+ //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0
+//Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile
+ "movq 0*8+%3, %%mm4 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6
+ "movq %%mm1, %%mm2 \n\t"
+
+ "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5
+ "paddw %%mm4, %%mm5 \n\t"
+
+ "movq 1*8+%3, %%mm6 \n\t"
+ //paddw mm3, MM_2
+ "psraw $2, %%mm3 \n\t" //tmp7
+
+ "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4
+ "psubw %%mm3, %%mm4 \n\t"
+
+ "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t"
+ "paddw %%mm3, %%mm5 \n\t"
+
+ "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
+ "paddw %%mm6, %%mm7 \n\t"
+
+ "movq 2*8+%3, %%mm3 \n\t"
+ "psubw %%mm0, %%mm6 \n\t"
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"
+ "paddw %%mm0, %%mm7 \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
+ "psubw %%mm1, %%mm3 \n\t"
+
+ "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t"
+ "paddw %%mm1, %%mm4 \n\t"
+
+ "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"
+ "paddw %%mm3, %%mm5 \n\t"
+
+ "movq 3*8+%3, %%mm0 \n\t"
+ "add $24, %%"REG_S" \n\t"
+
+ "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+
+ "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+
+ "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t"
+ "paddw %%mm2, %%mm6 \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
+ "paddw %%mm0, %%mm7 \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
+
+ "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
+ "add $24, %%"REG_D" \n\t"
+ "sub $2, %%"REG_c" \n\t"
+ "jnz 1b \n\t"
+ "jmp 5f \n\t"
+
+ "3: \n\t"
+ //--- non DC2
+ //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1)
+ //psraw mm5, 2
+ //psraw mm0, 2
+ //psraw mm6, 2
+ "movq %%mm5, %%mm3 \n\t"
+ "psubw %%mm1, %%mm5 \n\t"
+
+ "psllw $1, %%mm5 \n\t" //'z10
+ "paddw %%mm1, %%mm3 \n\t" //'z13
+
+ "movq %%mm0, %%mm2 \n\t"
+ "psubw %%mm6, %%mm0 \n\t"
+
+ "movq %%mm5, %%mm1 \n\t"
+ "psllw $1, %%mm0 \n\t" //'z12
+
+ "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //-
+ "paddw %%mm0, %%mm5 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5
+ "paddw %%mm6, %%mm2 \n\t" //'z11
+
+ "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t"
+ "movq %%mm2, %%mm7 \n\t"
+
+ //---
+ "movq 0*8+%3, %%mm4 \n\t"
+ "psubw %%mm3, %%mm2 \n\t"
+
+ "psllw $1, %%mm2 \n\t"
+ "paddw %%mm3, %%mm7 \n\t" //'t7
+
+ "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11
+ "movq %%mm4, %%mm6 \n\t"
+ //paddw mm7, MM_2
+ "psraw $2, %%mm7 \n\t"
+
+ "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"
+ "psubw %%mm7, %%mm6 \n\t"
+
+ "movq 1*8+%3, %%mm3 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
+ "paddw %%mm5, %%mm1 \n\t" //'t12
+
+ "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
+ "psubw %%mm7, %%mm1 \n\t" //'t6
+
+ "movq 2*8+%3, %%mm7 \n\t"
+ "psubw %%mm5, %%mm0 \n\t" //'t10
+
+ "movq 3*8+%3, %%mm6 \n\t"
+ "movq %%mm3, %%mm5 \n\t"
+
+ "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"
+ "psubw %%mm1, %%mm5 \n\t"
+
+ "psubw %%mm1, %%mm2 \n\t" //'t5
+ "paddw %%mm1, %%mm3 \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
+ "movq %%mm7, %%mm4 \n\t"
+
+ "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t"
+ "psubw %%mm2, %%mm4 \n\t"
+
+ "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t"
+ "paddw %%mm2, %%mm7 \n\t"
+
+ "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
+ "paddw %%mm2, %%mm0 \n\t" //'t4
+
+ // 't4 't6 't5 - - - - 't7
+ "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
+ "movq %%mm6, %%mm1 \n\t"
+
+ "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+
+ "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+
+ "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
+ "add $24, %%"REG_S" \n\t"
+
+ "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
+
+ "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
+ "add $24, %%"REG_D" \n\t"
+ "sub $2, %%"REG_c" \n\t"
+ "jnz 1b \n\t"
+ "5: \n\t"
+
+ : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps)
+ : "d"(thr_adr)
+ NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,MM_2,MM_FIX_1_414213562_A,MM_FIX_1_414213562,MM_FIX_0_382683433,
+ ff_MM_FIX_0_541196100,MM_FIX_1_306562965,MM_FIX_0_847759065)
+ NAMED_CONSTRAINTS_ADD(MM_FIX_0_566454497,MM_FIX_0_198912367,MM_FIX_2_613125930,MM_FIX_1_847759065,
+ MM_FIX_1_082392200)
+ : "%"REG_a
+ );
+}
+
+#endif // HAVE_MMX
+
+#if !HAVE_MMX
+
+static void row_idct_c(int16_t* workspace,
+ int16_t* output_adr, int output_stride, int cnt)
+{
+ int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int_simd16_t tmp10, tmp11, tmp12, tmp13;
+ int_simd16_t z5, z10, z11, z12, z13;
+ int16_t* outptr;
+ int16_t* wsptr;
+
+ cnt*=4;
+ wsptr = workspace;
+ outptr = output_adr;
+ for (; cnt > 0; cnt--) {
+ // Even part
+ //Simd version reads 4x4 block and transposes it
+ tmp10 = ( wsptr[2] + wsptr[3]);
+ tmp11 = ( wsptr[2] - wsptr[3]);
+
+ tmp13 = ( wsptr[0] + wsptr[1]);
+ tmp12 = (MULTIPLY16H( wsptr[0] - wsptr[1], FIX_1_414213562_A)<<2) - tmp13;//this shift order to avoid overflow
+
+ tmp0 = tmp10 + tmp13; //->temps
+ tmp3 = tmp10 - tmp13; //->temps
+ tmp1 = tmp11 + tmp12;
+ tmp2 = tmp11 - tmp12;
+
+ // Odd part
+ //Also transpose, with previous:
+ // ---- ---- ||||
+ // ---- ---- idct ||||
+ // ---- ---- ---> ||||
+ // ---- ---- ||||
+ z13 = wsptr[4] + wsptr[5];
+ z10 = wsptr[4] - wsptr[5];
+ z11 = wsptr[6] + wsptr[7];
+ z12 = wsptr[6] - wsptr[7];
+
+ tmp7 = z11 + z13;
+ tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
+
+ z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
+ tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
+ tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
+
+ tmp6 = (tmp12<<3) - tmp7;
+ tmp5 = (tmp11<<3) - tmp6;
+ tmp4 = (tmp10<<3) + tmp5;
+
+ // Final output stage: descale and write column
+ outptr[0*output_stride]+= DESCALE(tmp0 + tmp7, 3);
+ outptr[1*output_stride]+= DESCALE(tmp1 + tmp6, 3);
+ outptr[2*output_stride]+= DESCALE(tmp2 + tmp5, 3);
+ outptr[3*output_stride]+= DESCALE(tmp3 - tmp4, 3);
+ outptr[4*output_stride]+= DESCALE(tmp3 + tmp4, 3);
+ outptr[5*output_stride]+= DESCALE(tmp2 - tmp5, 3);
+ outptr[6*output_stride]+= DESCALE(tmp1 - tmp6, 3); //no += ?
+ outptr[7*output_stride]+= DESCALE(tmp0 - tmp7, 3); //no += ?
+ outptr++;
+
+ wsptr += DCTSIZE; // advance pointer to next row
+ }
+}
+
+#else /* HAVE_MMX */
+
+static void row_idct_mmx (int16_t* workspace,
+ int16_t* output_adr, int output_stride, int cnt)
+{
+ DECLARE_ALIGNED(8, uint64_t, temps)[4];
+ __asm__ volatile(
+ "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
+ "1: \n\t"
+ "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t"
+ //
+
+ "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm1 \n\t"
+ "movq %%mm0, %%mm4 \n\t"
+
+ "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
+ "punpcklwd %%mm1, %%mm0 \n\t"
+
+ "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm3 \n\t"
+ "punpckhwd %%mm1, %%mm4 \n\t"
+
+ //transpose 4x4
+ "movq %%mm2, %%mm7 \n\t"
+ "punpcklwd %%mm3, %%mm2 \n\t"
+
+ "movq %%mm0, %%mm6 \n\t"
+ "punpckldq %%mm2, %%mm0 \n\t" //0
+
+ "punpckhdq %%mm2, %%mm6 \n\t" //1
+ "movq %%mm0, %%mm5 \n\t"
+
+ "punpckhwd %%mm3, %%mm7 \n\t"
+ "psubw %%mm6, %%mm0 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm0 \n\t"
+ "movq %%mm4, %%mm2 \n\t"
+
+ "punpckldq %%mm7, %%mm4 \n\t" //2
+ "paddw %%mm6, %%mm5 \n\t"
+
+ "punpckhdq %%mm7, %%mm2 \n\t" //3
+ "movq %%mm4, %%mm1 \n\t"
+
+ "psllw $2, %%mm0 \n\t"
+ "paddw %%mm2, %%mm4 \n\t" //t10
+
+ "movq "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_S"), %%mm3 \n\t"
+ "psubw %%mm2, %%mm1 \n\t" //t11
+
+ "movq "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_S"), %%mm2 \n\t"
+ "psubw %%mm5, %%mm0 \n\t"
+
+ "movq %%mm4, %%mm6 \n\t"
+ "paddw %%mm5, %%mm4 \n\t" //t0
+
+ "psubw %%mm5, %%mm6 \n\t" //t3
+ "movq %%mm1, %%mm7 \n\t"
+
+ "movq "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_S"), %%mm5 \n\t"
+ "paddw %%mm0, %%mm1 \n\t" //t1
+
+ "movq %%mm4, 0*8+%3 \n\t" //t0
+ "movq %%mm3, %%mm4 \n\t"
+
+ "movq %%mm6, 1*8+%3 \n\t" //t3
+ "punpcklwd %%mm2, %%mm3 \n\t"
+
+ //transpose 4x4
+ "movq "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_S"), %%mm6 \n\t"
+ "punpckhwd %%mm2, %%mm4 \n\t"
+
+ "movq %%mm5, %%mm2 \n\t"
+ "punpcklwd %%mm6, %%mm5 \n\t"
+
+ "psubw %%mm0, %%mm7 \n\t" //t2
+ "punpckhwd %%mm6, %%mm2 \n\t"
+
+ "movq %%mm3, %%mm0 \n\t"
+ "punpckldq %%mm5, %%mm3 \n\t" //4
+
+ "punpckhdq %%mm5, %%mm0 \n\t" //5
+ "movq %%mm4, %%mm5 \n\t"
+
+ //
+ "movq %%mm3, %%mm6 \n\t"
+ "punpckldq %%mm2, %%mm4 \n\t" //6
+
+ "psubw %%mm0, %%mm3 \n\t" //z10
+ "punpckhdq %%mm2, %%mm5 \n\t" //7
+
+ "paddw %%mm0, %%mm6 \n\t" //z13
+ "movq %%mm4, %%mm2 \n\t"
+
+ "movq %%mm3, %%mm0 \n\t"
+ "psubw %%mm5, %%mm4 \n\t" //z12
+
+ "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm0 \n\t" //-
+ "paddw %%mm4, %%mm3 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm3 \n\t" //z5
+ "paddw %%mm5, %%mm2 \n\t" //z11 >
+
+ "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm4 \n\t"
+ "movq %%mm2, %%mm5 \n\t"
+
+ "psubw %%mm6, %%mm2 \n\t"
+ "paddw %%mm6, %%mm5 \n\t" //t7
+
+ "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //t11
+ "paddw %%mm3, %%mm0 \n\t" //t12
+
+ "psllw $3, %%mm0 \n\t"
+ "psubw %%mm3, %%mm4 \n\t" //t10
+
+ "movq 0*8+%3, %%mm6 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+
+ "psllw $3, %%mm4 \n\t"
+ "psubw %%mm5, %%mm0 \n\t" //t6
+
+ "psllw $3, %%mm2 \n\t"
+ "paddw %%mm0, %%mm1 \n\t" //d1
+
+ "psubw %%mm0, %%mm2 \n\t" //t5
+ "psubw %%mm0, %%mm3 \n\t" //d6
+
+ "paddw %%mm2, %%mm4 \n\t" //t4
+ "movq %%mm7, %%mm0 \n\t"
+
+ "paddw %%mm2, %%mm7 \n\t" //d2
+ "psubw %%mm2, %%mm0 \n\t" //d5
+
+ "movq "MANGLE(MM_DESCALE_RND)", %%mm2 \n\t" //4
+ "psubw %%mm5, %%mm6 \n\t" //d7
+
+ "paddw 0*8+%3, %%mm5 \n\t" //d0
+ "paddw %%mm2, %%mm1 \n\t"
+
+ "paddw %%mm2, %%mm5 \n\t"
+ "psraw $3, %%mm1 \n\t"
+
+ "paddw %%mm2, %%mm7 \n\t"
+ "psraw $3, %%mm5 \n\t"
+
+ "paddw (%%"REG_D"), %%mm5 \n\t"
+ "psraw $3, %%mm7 \n\t"
+
+ "paddw (%%"REG_D",%%"REG_a"), %%mm1 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+
+ "paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t"
+ "paddw %%mm2, %%mm3 \n\t"
+
+ "movq %%mm5, (%%"REG_D") \n\t"
+ "paddw %%mm2, %%mm6 \n\t"
+
+ "movq %%mm1, (%%"REG_D",%%"REG_a") \n\t"
+ "psraw $3, %%mm0 \n\t"
+
+ "movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t"
+ "add %%"REG_d", %%"REG_D" \n\t" //3*ls
+
+ "movq 1*8+%3, %%mm5 \n\t" //t3
+ "psraw $3, %%mm3 \n\t"
+
+ "paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t"
+ "psubw %%mm4, %%mm5 \n\t" //d3
+
+ "paddw (%%"REG_D",%%"REG_d"), %%mm3 \n\t"
+ "psraw $3, %%mm6 \n\t"
+
+ "paddw 1*8+%3, %%mm4 \n\t" //d4
+ "paddw %%mm2, %%mm5 \n\t"
+
+ "paddw (%%"REG_D",%%"REG_a",4), %%mm6 \n\t"
+ "paddw %%mm2, %%mm4 \n\t"
+
+ "movq %%mm0, (%%"REG_D",%%"REG_a",2) \n\t"
+ "psraw $3, %%mm5 \n\t"
+
+ "paddw (%%"REG_D"), %%mm5 \n\t"
+ "psraw $3, %%mm4 \n\t"
+
+ "paddw (%%"REG_D",%%"REG_a"), %%mm4 \n\t"
+ "add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows
+
+ "movq %%mm3, (%%"REG_D",%%"REG_d") \n\t"
+ "movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t"
+ "movq %%mm5, (%%"REG_D") \n\t"
+ "movq %%mm4, (%%"REG_D",%%"REG_a") \n\t"
+
+ "sub %%"REG_d", %%"REG_D" \n\t"
+ "add $8, %%"REG_D" \n\t"
+ "dec %%"REG_c" \n\t"
+ "jnz 1b \n\t"
+
+ : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps)
+ : "a"(output_stride*sizeof(short))
+ NAMED_CONSTRAINTS_ADD(MM_FIX_1_414213562_A,MM_FIX_2_613125930,MM_FIX_1_847759065,MM_FIX_1_082392200,
+ MM_FIX_1_414213562,MM_DESCALE_RND)
+ : "%"REG_d
+ );
+}
+
+#endif // HAVE_MMX
+
+#if !HAVE_MMX
+
+static void row_fdct_c(int16_t *data, const uint8_t *pixels, int line_size, int cnt)
+{
+ int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int_simd16_t tmp10, tmp11, tmp12, tmp13;
+ int_simd16_t z1, z2, z3, z4, z5, z11, z13;
+ int16_t *dataptr;
+
+ cnt*=4;
+ // Pass 1: process rows.
+
+ dataptr = data;
+ for (; cnt > 0; cnt--) {
+ tmp0 = pixels[line_size*0] + pixels[line_size*7];
+ tmp7 = pixels[line_size*0] - pixels[line_size*7];
+ tmp1 = pixels[line_size*1] + pixels[line_size*6];
+ tmp6 = pixels[line_size*1] - pixels[line_size*6];
+ tmp2 = pixels[line_size*2] + pixels[line_size*5];
+ tmp5 = pixels[line_size*2] - pixels[line_size*5];
+ tmp3 = pixels[line_size*3] + pixels[line_size*4];
+ tmp4 = pixels[line_size*3] - pixels[line_size*4];
+
+ // Even part
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+ //Even columns are written first, this leads to different order of columns
+ //in column_fidct(), but they are processed independently, so all ok.
+ //Later in the row_idct() columns readed at the same order.
+ dataptr[2] = tmp10 + tmp11;
+ dataptr[3] = tmp10 - tmp11;
+
+ z1 = MULTIPLY16H((tmp12 + tmp13)<<2, FIX_0_707106781);
+ dataptr[0] = tmp13 + z1;
+ dataptr[1] = tmp13 - z1;
+
+ // Odd part
+
+ tmp10 = (tmp4 + tmp5) <<2;
+ tmp11 = (tmp5 + tmp6) <<2;
+ tmp12 = (tmp6 + tmp7) <<2;
+
+ z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
+ z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
+ z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
+ z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
+
+ z11 = tmp7 + z3;
+ z13 = tmp7 - z3;
+
+ dataptr[4] = z13 + z2;
+ dataptr[5] = z13 - z2;
+ dataptr[6] = z11 + z4;
+ dataptr[7] = z11 - z4;
+
+ pixels++; // advance pointer to next column
+ dataptr += DCTSIZE;
+ }
+}
+
+#else /* HAVE_MMX */
+
+static void row_fdct_mmx(int16_t *data, const uint8_t *pixels, int line_size, int cnt)
+{
+ DECLARE_ALIGNED(8, uint64_t, temps)[4];
+ __asm__ volatile(
+ "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
+ "6: \n\t"
+ "movd (%%"REG_S"), %%mm0 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+
+ "movd (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+
+ "movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "add %%"REG_d", %%"REG_S" \n\t"
+
+ "movq %%mm0, %%mm5 \n\t"
+ //
+
+ "movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch!
+ "movq %%mm1, %%mm6 \n\t"
+
+ "movd (%%"REG_S",%%"REG_d"), %%mm4 \n\t" //6
+ "punpcklbw %%mm7, %%mm3 \n\t"
+
+ "psubw %%mm3, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+
+ "paddw %%mm3, %%mm0 \n\t"
+ "psubw %%mm4, %%mm6 \n\t"
+
+ "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5
+ "paddw %%mm4, %%mm1 \n\t"
+
+ "movq %%mm5, %3 \n\t" //t7
+ "punpcklbw %%mm7, %%mm3 \n\t"
+
+ "movq %%mm6, %4 \n\t" //t6
+ "movq %%mm2, %%mm4 \n\t"
+
+ "movd (%%"REG_S"), %%mm5 \n\t" //3
+ "paddw %%mm3, %%mm2 \n\t"
+
+ "movd (%%"REG_S",%%"REG_a"), %%mm6 \n\t" //4
+ "punpcklbw %%mm7, %%mm5 \n\t"
+
+ "psubw %%mm3, %%mm4 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+
+ "movq %%mm5, %%mm3 \n\t"
+ "paddw %%mm6, %%mm5 \n\t" //t3
+
+ "psubw %%mm6, %%mm3 \n\t" //t4 ; t0 t1 t2 t4 t5 t3 - -
+ "movq %%mm0, %%mm6 \n\t"
+
+ "movq %%mm1, %%mm7 \n\t"
+ "psubw %%mm5, %%mm0 \n\t" //t13
+
+ "psubw %%mm2, %%mm1 \n\t"
+ "paddw %%mm2, %%mm7 \n\t" //t11
+
+ "paddw %%mm0, %%mm1 \n\t"
+ "movq %%mm7, %%mm2 \n\t"
+
+ "psllw $2, %%mm1 \n\t"
+ "paddw %%mm5, %%mm6 \n\t" //t10
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm1 \n\t"
+ "paddw %%mm6, %%mm7 \n\t" //d2
+
+ "psubw %%mm2, %%mm6 \n\t" //d3
+ "movq %%mm0, %%mm5 \n\t"
+
+ //transpose 4x4
+ "movq %%mm7, %%mm2 \n\t"
+ "punpcklwd %%mm6, %%mm7 \n\t"
+
+ "paddw %%mm1, %%mm0 \n\t" //d0
+ "punpckhwd %%mm6, %%mm2 \n\t"
+
+ "psubw %%mm1, %%mm5 \n\t" //d1
+ "movq %%mm0, %%mm6 \n\t"
+
+ "movq %4, %%mm1 \n\t"
+ "punpcklwd %%mm5, %%mm0 \n\t"
+
+ "punpckhwd %%mm5, %%mm6 \n\t"
+ "movq %%mm0, %%mm5 \n\t"
+
+ "punpckldq %%mm7, %%mm0 \n\t" //0
+ "paddw %%mm4, %%mm3 \n\t"
+
+ "punpckhdq %%mm7, %%mm5 \n\t" //1
+ "movq %%mm6, %%mm7 \n\t"
+
+ "movq %%mm0, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
+ "punpckldq %%mm2, %%mm6 \n\t" //2
+
+ "movq %%mm5, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
+ "punpckhdq %%mm2, %%mm7 \n\t" //3
+
+ "movq %%mm6, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
+ "paddw %%mm1, %%mm4 \n\t"
+
+ "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
+ "psllw $2, %%mm3 \n\t" //t10
+
+ "movq %3, %%mm2 \n\t"
+ "psllw $2, %%mm4 \n\t" //t11
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_707106781)", %%mm4 \n\t" //z3
+ "paddw %%mm2, %%mm1 \n\t"
+
+ "psllw $2, %%mm1 \n\t" //t12
+ "movq %%mm3, %%mm0 \n\t"
+
+ "pmulhw "MANGLE(ff_MM_FIX_0_541196100)", %%mm0 \n\t"
+ "psubw %%mm1, %%mm3 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" //z5
+ "movq %%mm2, %%mm5 \n\t"
+
+ "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm1 \n\t"
+ "psubw %%mm4, %%mm2 \n\t" //z13
+
+ "paddw %%mm4, %%mm5 \n\t" //z11
+ "movq %%mm2, %%mm6 \n\t"
+
+ "paddw %%mm3, %%mm0 \n\t" //z2
+ "movq %%mm5, %%mm7 \n\t"
+
+ "paddw %%mm0, %%mm2 \n\t" //d4
+ "psubw %%mm0, %%mm6 \n\t" //d5
+
+ "movq %%mm2, %%mm4 \n\t"
+ "paddw %%mm3, %%mm1 \n\t" //z4
+
+ //transpose 4x4
+ "punpcklwd %%mm6, %%mm2 \n\t"
+ "paddw %%mm1, %%mm5 \n\t" //d6
+
+ "punpckhwd %%mm6, %%mm4 \n\t"
+ "psubw %%mm1, %%mm7 \n\t" //d7
+
+ "movq %%mm5, %%mm6 \n\t"
+ "punpcklwd %%mm7, %%mm5 \n\t"
+
+ "punpckhwd %%mm7, %%mm6 \n\t"
+ "movq %%mm2, %%mm7 \n\t"
+
+ "punpckldq %%mm5, %%mm2 \n\t" //4
+ "sub %%"REG_d", %%"REG_S" \n\t"
+
+ "punpckhdq %%mm5, %%mm7 \n\t" //5
+ "movq %%mm4, %%mm5 \n\t"
+
+ "movq %%mm2, "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_D") \n\t"
+ "punpckldq %%mm6, %%mm4 \n\t" //6
+
+ "movq %%mm7, "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_D") \n\t"
+ "punpckhdq %%mm6, %%mm5 \n\t" //7
+
+ "movq %%mm4, "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_D") \n\t"
+ "add $4, %%"REG_S" \n\t"
+
+ "movq %%mm5, "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_D") \n\t"
+ "add $"DCTSIZE_S"*2*4, %%"REG_D" \n\t" //4 rows
+ "dec %%"REG_c" \n\t"
+ "jnz 6b \n\t"
+
+ : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps), "=o"(temps[1])
+ : "a"(line_size)
+ NAMED_CONSTRAINTS_ADD(ff_MM_FIX_0_707106781,ff_MM_FIX_0_541196100,MM_FIX_0_382683433,MM_FIX_1_306562965)
+ : "%"REG_d);
+}
+
+#endif // HAVE_MMX
diff --git a/libavfilter/libmpcodecs/vf_ilpack.c b/libavfilter/libmpcodecs/vf_ilpack.c
new file mode 100644
index 0000000..fbf5817
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_ilpack.c
@@ -0,0 +1,458 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "config.h"
+#include "mp_msg.h"
+#include "cpudetect.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+#include "libavutil/attributes.h"
+#include "libavutil/x86/asm.h"
+
+typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs);
+
+struct vf_priv_s {
+ int mode;
+ pack_func_t *pack[2];
+};
+
+static void pack_nn_C(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w,
+ int av_unused us, int av_unused vs)
+{
+ int j;
+ for (j = w/2; j; j--) {
+ *dst++ = *y++;
+ *dst++ = *u++;
+ *dst++ = *y++;
+ *dst++ = *v++;
+ }
+}
+
+static void pack_li_0_C(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs)
+{
+ int j;
+ for (j = w/2; j; j--) {
+ *dst++ = *y++;
+ *dst++ = (u[us+us] + 7*u[0])>>3;
+ *dst++ = *y++;
+ *dst++ = (v[vs+vs] + 7*v[0])>>3;
+ u++; v++;
+ }
+}
+
+static void pack_li_1_C(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs)
+{
+ int j;
+ for (j = w/2; j; j--) {
+ *dst++ = *y++;
+ *dst++ = (3*u[us+us] + 5*u[0])>>3;
+ *dst++ = *y++;
+ *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
+ u++; v++;
+ }
+}
+
+#if HAVE_MMX
+static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w,
+ int av_unused us, int av_unused vs)
+{
+ __asm__ volatile (""
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%0), %%mm1 \n\t"
+ "movq (%0), %%mm2 \n\t"
+ "movq (%1), %%mm4 \n\t"
+ "movq (%2), %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpcklbw %%mm4, %%mm1 \n\t"
+ "punpckhbw %%mm4, %%mm2 \n\t"
+
+ "add $8, %0 \n\t"
+ "add $4, %1 \n\t"
+ "add $4, %2 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "movq %%mm2, 8(%3) \n\t"
+ "add $16, %3 \n\t"
+ "decl %4 \n\t"
+ "jnz 1b \n\t"
+ "emms \n\t"
+ :
+ : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
+ : "memory"
+ );
+ pack_nn_C(dst, y, u, v, (w&7), 0, 0);
+}
+
+#if HAVE_EBX_AVAILABLE
+static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs)
+{
+ __asm__ volatile (""
+ "push %%"REG_BP" \n\t"
+#if ARCH_X86_64
+ "mov %6, %%"REG_BP" \n\t"
+#else
+ "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
+ "movl (%%"REG_d"), %%"REG_d" \n\t"
+#endif
+ "pxor %%mm0, %%mm0 \n\t"
+
+ ASMALIGN(4)
+ "2: \n\t"
+ "movq (%%"REG_S"), %%mm1 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
+
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
+ "punpcklbw %%mm0, %%mm4 \n\t"
+ "punpcklbw %%mm0, %%mm6 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
+ "punpcklbw %%mm0, %%mm3 \n\t"
+ "punpcklbw %%mm0, %%mm5 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "packuswb %%mm6, %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpcklbw %%mm4, %%mm1 \n\t"
+ "punpckhbw %%mm4, %%mm2 \n\t"
+
+ "movq %%mm1, (%%"REG_D") \n\t"
+ "movq %%mm2, 8(%%"REG_D") \n\t"
+
+ "movq 8(%%"REG_S"), %%mm1 \n\t"
+ "movq 8(%%"REG_S"), %%mm2 \n\t"
+
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
+ "punpckhbw %%mm0, %%mm4 \n\t"
+ "punpckhbw %%mm0, %%mm6 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
+ "punpckhbw %%mm0, %%mm3 \n\t"
+ "punpckhbw %%mm0, %%mm5 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "packuswb %%mm6, %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpcklbw %%mm4, %%mm1 \n\t"
+ "punpckhbw %%mm4, %%mm2 \n\t"
+
+ "add $16, %%"REG_S" \n\t"
+ "add $8, %%"REG_a" \n\t"
+ "add $8, %%"REG_b" \n\t"
+
+ "movq %%mm1, 16(%%"REG_D") \n\t"
+ "movq %%mm2, 24(%%"REG_D") \n\t"
+ "add $32, %%"REG_D" \n\t"
+
+ "decl %%ecx \n\t"
+ "jnz 2b \n\t"
+ "emms \n\t"
+ "pop %%"REG_BP" \n\t"
+ :
+ : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
+#if ARCH_X86_64
+ "d" ((x86_reg)us), "r" ((x86_reg)vs)
+#else
+ "d" (&us)
+#endif
+ : "memory"
+ );
+ pack_li_0_C(dst, y, u, v, (w&15), us, vs);
+}
+
+static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
+ unsigned char *u, unsigned char *v, int w, int us, int vs)
+{
+ __asm__ volatile (""
+ "push %%"REG_BP" \n\t"
+#if ARCH_X86_64
+ "mov %6, %%"REG_BP" \n\t"
+#else
+ "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
+ "movl (%%"REG_d"), %%"REG_d" \n\t"
+#endif
+ "pxor %%mm0, %%mm0 \n\t"
+
+ ASMALIGN(4)
+ "3: \n\t"
+ "movq (%%"REG_S"), %%mm1 \n\t"
+ "movq (%%"REG_S"), %%mm2 \n\t"
+
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
+ "punpcklbw %%mm0, %%mm4 \n\t"
+ "punpcklbw %%mm0, %%mm6 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
+ "punpcklbw %%mm0, %%mm3 \n\t"
+ "punpcklbw %%mm0, %%mm5 \n\t"
+ "movq %%mm4, %%mm7 \n\t"
+ "paddw %%mm4, %%mm4 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+ "movq %%mm6, %%mm7 \n\t"
+ "paddw %%mm6, %%mm6 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "packuswb %%mm6, %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpcklbw %%mm4, %%mm1 \n\t"
+ "punpckhbw %%mm4, %%mm2 \n\t"
+
+ "movq %%mm1, (%%"REG_D") \n\t"
+ "movq %%mm2, 8(%%"REG_D") \n\t"
+
+ "movq 8(%%"REG_S"), %%mm1 \n\t"
+ "movq 8(%%"REG_S"), %%mm2 \n\t"
+
+ "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
+ "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
+ "punpckhbw %%mm0, %%mm4 \n\t"
+ "punpckhbw %%mm0, %%mm6 \n\t"
+ "movq (%%"REG_a"), %%mm3 \n\t"
+ "movq (%%"REG_b"), %%mm5 \n\t"
+ "punpckhbw %%mm0, %%mm3 \n\t"
+ "punpckhbw %%mm0, %%mm5 \n\t"
+ "movq %%mm4, %%mm7 \n\t"
+ "paddw %%mm4, %%mm4 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+ "movq %%mm6, %%mm7 \n\t"
+ "paddw %%mm6, %%mm6 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "paddw %%mm3, %%mm4 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "psrlw $3, %%mm4 \n\t"
+ "psrlw $3, %%mm6 \n\t"
+ "packuswb %%mm4, %%mm4 \n\t"
+ "packuswb %%mm6, %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpcklbw %%mm4, %%mm1 \n\t"
+ "punpckhbw %%mm4, %%mm2 \n\t"
+
+ "add $16, %%"REG_S" \n\t"
+ "add $8, %%"REG_a" \n\t"
+ "add $8, %%"REG_b" \n\t"
+
+ "movq %%mm1, 16(%%"REG_D") \n\t"
+ "movq %%mm2, 24(%%"REG_D") \n\t"
+ "add $32, %%"REG_D" \n\t"
+
+ "decl %%ecx \n\t"
+ "jnz 3b \n\t"
+ "emms \n\t"
+ "pop %%"REG_BP" \n\t"
+ :
+ : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
+#if ARCH_X86_64
+ "d" ((x86_reg)us), "r" ((x86_reg)vs)
+#else
+ "d" (&us)
+#endif
+ : "memory"
+ );
+ pack_li_1_C(dst, y, u, v, (w&15), us, vs);
+}
+#endif /* HAVE_EBX_AVAILABLE */
+#endif
+
+static pack_func_t *pack_nn;
+static pack_func_t *pack_li_0;
+static pack_func_t *pack_li_1;
+
+static void ilpack(unsigned char *dst, unsigned char *src[3],
+ int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
+{
+ int i;
+ unsigned char *y, *u, *v;
+ int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
+ int a, b;
+
+ y = src[0];
+ u = src[1];
+ v = src[2];
+
+ pack_nn(dst, y, u, v, w, 0, 0);
+ y += ys; dst += dststride;
+ pack_nn(dst, y, u+us, v+vs, w, 0, 0);
+ y += ys; dst += dststride;
+ for (i=2; i<h-2; i++) {
+ a = (i&2) ? 1 : -1;
+ b = (i&1) ^ ((i&2)>>1);
+ pack[b](dst, y, u, v, w, us*a, vs*a);
+ y += ys;
+ if ((i&3) == 1) {
+ u -= us;
+ v -= vs;
+ } else {
+ u += us;
+ v += vs;
+ }
+ dst += dststride;
+ }
+ pack_nn(dst, y, u, v, w, 0, 0);
+ y += ys; dst += dststride; u += us; v += vs;
+ pack_nn(dst, y, u, v, w, 0, 0);
+}
+
+
+static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
+{
+ mp_image_t *dmpi;
+
+ // hope we'll get DR buffer:
+ dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2,
+ MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
+ mpi->w, mpi->h);
+
+ ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
+
+ return ff_vf_next_put_image(vf,dmpi, pts);
+}
+
+static int config(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt)
+{
+ /* FIXME - also support UYVY output? */
+ return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
+}
+
+
+static int query_format(struct vf_instance *vf, unsigned int fmt)
+{
+ /* FIXME - really any YUV 4:2:0 input format should work */
+ switch (fmt) {
+ case IMGFMT_YV12:
+ case IMGFMT_IYUV:
+ case IMGFMT_I420:
+ return ff_vf_next_query_format(vf,IMGFMT_YUY2);
+ }
+ return 0;
+}
+
+static int vf_open(vf_instance_t *vf, char *args)
+{
+ vf->config=config;
+ vf->query_format=query_format;
+ vf->put_image=put_image;
+ vf->priv = calloc(1, sizeof(struct vf_priv_s));
+ vf->priv->mode = 1;
+ if (args) sscanf(args, "%d", &vf->priv->mode);
+
+ pack_nn = pack_nn_C;
+ pack_li_0 = pack_li_0_C;
+ pack_li_1 = pack_li_1_C;
+#if HAVE_MMX
+ if(ff_gCpuCaps.hasMMX) {
+ pack_nn = pack_nn_MMX;
+#if HAVE_EBX_AVAILABLE
+ pack_li_0 = pack_li_0_MMX;
+ pack_li_1 = pack_li_1_MMX;
+#endif
+ }
+#endif
+
+ switch(vf->priv->mode) {
+ case 0:
+ vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
+ break;
+ default:
+ ff_mp_msg(MSGT_VFILTER, MSGL_WARN,
+ "ilpack: unknown mode %d (fallback to linear)\n",
+ vf->priv->mode);
+ /* Fallthrough */
+ case 1:
+ vf->priv->pack[0] = pack_li_0;
+ vf->priv->pack[1] = pack_li_1;
+ break;
+ }
+
+ return 1;
+}
+
+const vf_info_t ff_vf_info_ilpack = {
+ "4:2:0 planar -> 4:2:2 packed reinterlacer",
+ "ilpack",
+ "Richard Felker",
+ "",
+ vf_open,
+ NULL
+};
diff --git a/libavfilter/libmpcodecs/vf_pp7.c b/libavfilter/libmpcodecs/vf_pp7.c
new file mode 100644
index 0000000..89ed4fe
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_pp7.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (C) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+
+#include "config.h"
+
+#include "mp_msg.h"
+#include "cpudetect.h"
+
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#include "libavutil/mem.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+#include "libvo/fastmemcpy.h"
+
+#define XMIN(a,b) ((a) < (b) ? (a) : (b))
+#define XMAX(a,b) ((a) > (b) ? (a) : (b))
+
+//===========================================================================//
+DECLARE_ALIGNED(8, static const uint8_t, dither)[8][8] = {
+{ 0, 48, 12, 60, 3, 51, 15, 63, },
+{ 32, 16, 44, 28, 35, 19, 47, 31, },
+{ 8, 56, 4, 52, 11, 59, 7, 55, },
+{ 40, 24, 36, 20, 43, 27, 39, 23, },
+{ 2, 50, 14, 62, 1, 49, 13, 61, },
+{ 34, 18, 46, 30, 33, 17, 45, 29, },
+{ 10, 58, 6, 54, 9, 57, 5, 53, },
+{ 42, 26, 38, 22, 41, 25, 37, 21, },
+};
+
+struct vf_priv_s {
+ int qp;
+ int mode;
+ int mpeg2;
+ int temp_stride;
+ uint8_t *src;
+};
+#if 0
+static inline void dct7_c(int16_t *dst, int s0, int s1, int s2, int s3, int step){
+ int s, d;
+ int dst2[64];
+//#define S0 (1024/0.37796447300922719759)
+#define C0 ((int)(1024*0.37796447300922719759+0.5)) //sqrt(1/7)
+#define C1 ((int)(1024*0.53452248382484879308/6+0.5)) //sqrt(2/7)/6
+
+#define C2 ((int)(1024*0.45221175985034745004/2+0.5))
+#define C3 ((int)(1024*0.36264567479870879474/2+0.5))
+
+//0.1962505182412941918 0.0149276808419397944-0.2111781990832339584
+#define C4 ((int)(1024*0.1962505182412941918+0.5))
+#define C5 ((int)(1024*0.0149276808419397944+0.5))
+//#define C6 ((int)(1024*0.2111781990832339584+0.5))
+#if 0
+ s= s0 + s1 + s2;
+ dst[0*step] = ((s + s3)*C0 + 512) >> 10;
+ s= (s - 6*s3)*C1 + 512;
+ d= (s0-s2)*C4 + (s1-s2)*C5;
+ dst[1*step] = (s + 2*d)>>10;
+ s -= d;
+ d= (s1-s0)*C2 + (s1-s2)*C3;
+ dst[2*step] = (s + d)>>10;
+ dst[3*step] = (s - d)>>10;
+#elif 1
+ s = s3+s3;
+ s3= s-s0;
+ s0= s+s0;
+ s = s2+s1;
+ s2= s2-s1;
+ dst[0*step]= s0 + s;
+ dst[2*step]= s0 - s;
+ dst[1*step]= 2*s3 + s2;
+ dst[3*step]= s3 - 2*s2;
+#else
+ int i,j,n=7;
+ for(i=0; i<7; i+=2){
+ dst2[i*step/2]= 0;
+ for(j=0; j<4; j++)
+ dst2[i*step/2] += src[j*step] * cos(i*M_PI/n*(j+0.5)) * sqrt((i?2.0:1.0)/n);
+ if(fabs(dst2[i*step/2] - dst[i*step/2]) > 20)
+ printf("%d %d %d (%d %d %d %d) -> (%d %d %d %d)\n", i,dst2[i*step/2], dst[i*step/2],src[0*step], src[1*step], src[2*step], src[3*step], dst[0*step], dst[1*step],dst[2*step],dst[3*step]);
+ }
+#endif
+}
+#endif
+
+static inline void dctA_c(int16_t *dst, uint8_t *src, int stride){
+ int i;
+
+ for(i=0; i<4; i++){
+ int s0= src[0*stride] + src[6*stride];
+ int s1= src[1*stride] + src[5*stride];
+ int s2= src[2*stride] + src[4*stride];
+ int s3= src[3*stride];
+ int s= s3+s3;
+ s3= s-s0;
+ s0= s+s0;
+ s = s2+s1;
+ s2= s2-s1;
+ dst[0]= s0 + s;
+ dst[2]= s0 - s;
+ dst[1]= 2*s3 + s2;
+ dst[3]= s3 - 2*s2;
+ src++;
+ dst+=4;
+ }
+}
+
+static void dctB_c(int16_t *dst, int16_t *src){
+ int i;
+
+ for(i=0; i<4; i++){
+ int s0= src[0*4] + src[6*4];
+ int s1= src[1*4] + src[5*4];
+ int s2= src[2*4] + src[4*4];
+ int s3= src[3*4];
+ int s= s3+s3;
+ s3= s-s0;
+ s0= s+s0;
+ s = s2+s1;
+ s2= s2-s1;
+ dst[0*4]= s0 + s;
+ dst[2*4]= s0 - s;
+ dst[1*4]= 2*s3 + s2;
+ dst[3*4]= s3 - 2*s2;
+ src++;
+ dst++;
+ }
+}
+
+#if HAVE_MMX
+static void dctB_mmx(int16_t *dst, int16_t *src){
+ __asm__ volatile (
+ "movq (%0), %%mm0 \n\t"
+ "movq 1*4*2(%0), %%mm1 \n\t"
+ "paddw 6*4*2(%0), %%mm0 \n\t"
+ "paddw 5*4*2(%0), %%mm1 \n\t"
+ "movq 2*4*2(%0), %%mm2 \n\t"
+ "movq 3*4*2(%0), %%mm3 \n\t"
+ "paddw 4*4*2(%0), %%mm2 \n\t"
+ "paddw %%mm3, %%mm3 \n\t" //s
+ "movq %%mm3, %%mm4 \n\t" //s
+ "psubw %%mm0, %%mm3 \n\t" //s-s0
+ "paddw %%mm0, %%mm4 \n\t" //s+s0
+ "movq %%mm2, %%mm0 \n\t" //s2
+ "psubw %%mm1, %%mm2 \n\t" //s2-s1
+ "paddw %%mm1, %%mm0 \n\t" //s2+s1
+ "movq %%mm4, %%mm1 \n\t" //s0'
+ "psubw %%mm0, %%mm4 \n\t" //s0'-s'
+ "paddw %%mm0, %%mm1 \n\t" //s0'+s'
+ "movq %%mm3, %%mm0 \n\t" //s3'
+ "psubw %%mm2, %%mm3 \n\t"
+ "psubw %%mm2, %%mm3 \n\t"
+ "paddw %%mm0, %%mm2 \n\t"
+ "paddw %%mm0, %%mm2 \n\t"
+ "movq %%mm1, (%1) \n\t"
+ "movq %%mm4, 2*4*2(%1) \n\t"
+ "movq %%mm2, 1*4*2(%1) \n\t"
+ "movq %%mm3, 3*4*2(%1) \n\t"
+ :: "r" (src), "r"(dst)
+ );
+}
+#endif
+
+static void (*dctB)(int16_t *dst, int16_t *src)= dctB_c;
+
+#define N0 4
+#define N1 5
+#define N2 10
+#define SN0 2
+#define SN1 2.2360679775
+#define SN2 3.16227766017
+#define N (1<<16)
+
+static const int factor[16]={
+ N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2),
+ N/(N1*N0), N/(N1*N1), N/(N1*N0),N/(N1*N2),
+ N/(N0*N0), N/(N0*N1), N/(N0*N0),N/(N0*N2),
+ N/(N2*N0), N/(N2*N1), N/(N2*N0),N/(N2*N2),
+};
+
+static const int thres[16]={
+ N/(SN0*SN0), N/(SN0*SN2), N/(SN0*SN0),N/(SN0*SN2),
+ N/(SN2*SN0), N/(SN2*SN2), N/(SN2*SN0),N/(SN2*SN2),
+ N/(SN0*SN0), N/(SN0*SN2), N/(SN0*SN0),N/(SN0*SN2),
+ N/(SN2*SN0), N/(SN2*SN2), N/(SN2*SN0),N/(SN2*SN2),
+};
+
+static int thres2[99][16];
+
+static void init_thres2(void){
+ int qp, i;
+ int bias= 0; //FIXME
+
+ for(qp=0; qp<99; qp++){
+ for(i=0; i<16; i++){
+ thres2[qp][i]= ((i&1)?SN2:SN0) * ((i&4)?SN2:SN0) * XMAX(1,qp) * (1<<2) - 1 - bias;
+ }
+ }
+}
+
+static int hardthresh_c(int16_t *src, int qp){
+ int i;
+ int a;
+
+ a= src[0] * factor[0];
+ for(i=1; i<16; i++){
+ unsigned int threshold1= thres2[qp][i];
+ unsigned int threshold2= (threshold1<<1);
+ int level= src[i];
+ if(((unsigned)(level+threshold1))>threshold2){
+ a += level * factor[i];
+ }
+ }
+ return (a + (1<<11))>>12;
+}
+
+static int mediumthresh_c(int16_t *src, int qp){
+ int i;
+ int a;
+
+ a= src[0] * factor[0];
+ for(i=1; i<16; i++){
+ unsigned int threshold1= thres2[qp][i];
+ unsigned int threshold2= (threshold1<<1);
+ int level= src[i];
+ if(((unsigned)(level+threshold1))>threshold2){
+ if(((unsigned)(level+2*threshold1))>2*threshold2){
+ a += level * factor[i];
+ }else{
+ if(level>0) a+= 2*(level - (int)threshold1)*factor[i];
+ else a+= 2*(level + (int)threshold1)*factor[i];
+ }
+ }
+ }
+ return (a + (1<<11))>>12;
+}
+
+static int softthresh_c(int16_t *src, int qp){
+ int i;
+ int a;
+
+ a= src[0] * factor[0];
+ for(i=1; i<16; i++){
+ unsigned int threshold1= thres2[qp][i];
+ unsigned int threshold2= (threshold1<<1);
+ int level= src[i];
+ if(((unsigned)(level+threshold1))>threshold2){
+ if(level>0) a+= (level - (int)threshold1)*factor[i];
+ else a+= (level + (int)threshold1)*factor[i];
+ }
+ }
+ return (a + (1<<11))>>12;
+}
+
+static int (*requantize)(int16_t *src, int qp)= hardthresh_c;
+
+static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src, int dst_stride, int src_stride, int width, int height, uint8_t *qp_store, int qp_stride, int is_luma){
+ int x, y;
+ const int stride= is_luma ? p->temp_stride : ((width+16+15)&(~15));
+ uint8_t *p_src= p->src + 8*stride;
+ int16_t *block= (int16_t *)p->src;
+ int16_t *temp= (int16_t *)(p->src + 32);
+
+ if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
+ for(y=0; y<height; y++){
+ int index= 8 + 8*stride + y*stride;
+ fast_memcpy(p_src + index, src + y*src_stride, width);
+ for(x=0; x<8; x++){
+ p_src[index - x - 1]= p_src[index + x ];
+ p_src[index + width + x ]= p_src[index + width - x - 1];
+ }
+ }
+ for(y=0; y<8; y++){
+ fast_memcpy(p_src + ( 7-y)*stride, p_src + ( y+8)*stride, stride);
+ fast_memcpy(p_src + (height+8+y)*stride, p_src + (height-y+7)*stride, stride);
+ }
+ //FIXME (try edge emu)
+
+ for(y=0; y<height; y++){
+ for(x=-8; x<0; x+=4){
+ const int index= x + y*stride + (8-3)*(1+stride) + 8; //FIXME silly offset
+ uint8_t *src = p_src + index;
+ int16_t *tp= temp+4*x;
+
+ dctA_c(tp+4*8, src, stride);
+ }
+ for(x=0; x<width; ){
+ const int qps= 3 + is_luma;
+ int qp;
+ int end= XMIN(x+8, width);
+
+ if(p->qp)
+ qp= p->qp;
+ else{
+ qp= qp_store[ (XMIN(x, width-1)>>qps) + (XMIN(y, height-1)>>qps) * qp_stride];
+ qp=norm_qscale(qp, p->mpeg2);
+ }
+ for(; x<end; x++){
+ const int index= x + y*stride + (8-3)*(1+stride) + 8; //FIXME silly offset
+ uint8_t *src = p_src + index;
+ int16_t *tp= temp+4*x;
+ int v;
+
+ if((x&3)==0)
+ dctA_c(tp+4*8, src, stride);
+
+ dctB(block, tp);
+
+ v= requantize(block, qp);
+ v= (v + dither[y&7][x&7])>>6;
+ if((unsigned)v > 255)
+ v= (-v)>>31;
+ dst[x + y*dst_stride]= v;
+ }
+ }
+ }
+}
+
+static int config(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt){
+ int h= (height+16+15)&(~15);
+
+ vf->priv->temp_stride= (width+16+15)&(~15);
+ vf->priv->src = av_malloc(vf->priv->temp_stride*(h+8)*sizeof(uint8_t));
+
+ return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
+}
+
+static void get_image(struct vf_instance *vf, mp_image_t *mpi){
+ if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change
+ // ok, we can do pp in-place (or pp disabled):
+ vf->dmpi=ff_vf_get_image(vf->next,mpi->imgfmt,
+ mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height);
+ mpi->planes[0]=vf->dmpi->planes[0];
+ mpi->stride[0]=vf->dmpi->stride[0];
+ mpi->width=vf->dmpi->width;
+ if(mpi->flags&MP_IMGFLAG_PLANAR){
+ mpi->planes[1]=vf->dmpi->planes[1];
+ mpi->planes[2]=vf->dmpi->planes[2];
+ mpi->stride[1]=vf->dmpi->stride[1];
+ mpi->stride[2]=vf->dmpi->stride[2];
+ }
+ mpi->flags|=MP_IMGFLAG_DIRECT;
+}
+
+static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){
+ mp_image_t *dmpi;
+
+ if(mpi->flags&MP_IMGFLAG_DIRECT){
+ dmpi=vf->dmpi;
+ }else{
+ // no DR, so get a new image! hope we'll get DR buffer:
+ dmpi=ff_vf_get_image(vf->next,mpi->imgfmt,
+ MP_IMGTYPE_TEMP,
+ MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
+ mpi->width,mpi->height);
+ ff_vf_clone_mpi_attributes(dmpi, mpi);
+ }
+
+ vf->priv->mpeg2= mpi->qscale_type;
+ if(mpi->qscale || vf->priv->qp){
+ filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0], mpi->w, mpi->h, mpi->qscale, mpi->qstride, 1);
+ filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, mpi->qscale, mpi->qstride, 0);
+ filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, mpi->qscale, mpi->qstride, 0);
+ }else{
+ memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);
+ memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);
+ memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);
+ }
+
+#if HAVE_MMX
+ if(ff_gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
+#endif
+#if HAVE_MMX2
+ if(ff_gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
+#endif
+
+ return ff_vf_next_put_image(vf,dmpi, pts);
+}
+
+static void uninit(struct vf_instance *vf){
+ if(!vf->priv) return;
+
+ av_free(vf->priv->src);
+ vf->priv->src= NULL;
+
+ free(vf->priv);
+ vf->priv=NULL;
+}
+
+//===========================================================================//
+static int query_format(struct vf_instance *vf, unsigned int fmt){
+ switch(fmt){
+ case IMGFMT_YVU9:
+ case IMGFMT_IF09:
+ case IMGFMT_YV12:
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ case IMGFMT_CLPL:
+ case IMGFMT_Y800:
+ case IMGFMT_Y8:
+ case IMGFMT_444P:
+ case IMGFMT_422P:
+ case IMGFMT_411P:
+ return ff_vf_next_query_format(vf,fmt);
+ }
+ return 0;
+}
+
+static int control(struct vf_instance *vf, int request, void* data){
+ return ff_vf_next_control(vf,request,data);
+}
+
+static int vf_open(vf_instance_t *vf, char *args){
+ vf->config=config;
+ vf->put_image=put_image;
+ vf->get_image=get_image;
+ vf->query_format=query_format;
+ vf->uninit=uninit;
+ vf->control= control;
+ vf->priv=malloc(sizeof(struct vf_priv_s));
+ memset(vf->priv, 0, sizeof(struct vf_priv_s));
+
+ if (args) sscanf(args, "%d:%d", &vf->priv->qp, &vf->priv->mode);
+
+ if(vf->priv->qp < 0)
+ vf->priv->qp = 0;
+
+ init_thres2();
+
+ switch(vf->priv->mode){
+ case 0: requantize= hardthresh_c; break;
+ case 1: requantize= softthresh_c; break;
+ default:
+ case 2: requantize= mediumthresh_c; break;
+ }
+
+#if HAVE_MMX
+ if(ff_gCpuCaps.hasMMX){
+ dctB= dctB_mmx;
+ }
+#endif
+#if 0
+ if(ff_gCpuCaps.hasMMX){
+ switch(vf->priv->mode){
+ case 0: requantize= hardthresh_mmx; break;
+ case 1: requantize= softthresh_mmx; break;
+ }
+ }
+#endif
+
+ return 1;
+}
+
+const vf_info_t ff_vf_info_pp7 = {
+ "postprocess 7",
+ "pp7",
+ "Michael Niedermayer",
+ "",
+ vf_open,
+ NULL
+};
diff --git a/libavfilter/libmpcodecs/vf_softpulldown.c b/libavfilter/libmpcodecs/vf_softpulldown.c
new file mode 100644
index 0000000..556374e
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_softpulldown.c
@@ -0,0 +1,163 @@
+/*
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "config.h"
+#include "mp_msg.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+
+#include "libvo/fastmemcpy.h"
+
+struct vf_priv_s {
+ int state;
+ long long in;
+ long long out;
+};
+
+static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
+{
+ mp_image_t *dmpi;
+ int ret = 0;
+ int flags = mpi->fields;
+ int state = vf->priv->state;
+
+ dmpi = ff_vf_get_image(vf->next, mpi->imgfmt,
+ MP_IMGTYPE_STATIC, MP_IMGFLAG_ACCEPT_STRIDE |
+ MP_IMGFLAG_PRESERVE, mpi->width, mpi->height);
+
+ vf->priv->in++;
+
+ if ((state == 0 &&
+ !(flags & MP_IMGFIELD_TOP_FIRST)) ||
+ (state == 1 &&
+ flags & MP_IMGFIELD_TOP_FIRST)) {
+ ff_mp_msg(MSGT_VFILTER, MSGL_WARN,
+ "softpulldown: Unexpected field flags: state=%d top_field_first=%d repeat_first_field=%d\n",
+ state,
+ (flags & MP_IMGFIELD_TOP_FIRST) != 0,
+ (flags & MP_IMGFIELD_REPEAT_FIRST) != 0);
+ state ^= 1;
+ }
+
+ if (state == 0) {
+ ret = ff_vf_next_put_image(vf, mpi, MP_NOPTS_VALUE);
+ vf->priv->out++;
+ if (flags & MP_IMGFIELD_REPEAT_FIRST) {
+ my_memcpy_pic(dmpi->planes[0],
+ mpi->planes[0], mpi->w, mpi->h/2,
+ dmpi->stride[0]*2, mpi->stride[0]*2);
+ if (mpi->flags & MP_IMGFLAG_PLANAR) {
+ my_memcpy_pic(dmpi->planes[1],
+ mpi->planes[1],
+ mpi->chroma_width,
+ mpi->chroma_height/2,
+ dmpi->stride[1]*2,
+ mpi->stride[1]*2);
+ my_memcpy_pic(dmpi->planes[2],
+ mpi->planes[2],
+ mpi->chroma_width,
+ mpi->chroma_height/2,
+ dmpi->stride[2]*2,
+ mpi->stride[2]*2);
+ }
+ state=1;
+ }
+ } else {
+ my_memcpy_pic(dmpi->planes[0]+dmpi->stride[0],
+ mpi->planes[0]+mpi->stride[0], mpi->w, mpi->h/2,
+ dmpi->stride[0]*2, mpi->stride[0]*2);
+ if (mpi->flags & MP_IMGFLAG_PLANAR) {
+ my_memcpy_pic(dmpi->planes[1]+dmpi->stride[1],
+ mpi->planes[1]+mpi->stride[1],
+ mpi->chroma_width, mpi->chroma_height/2,
+ dmpi->stride[1]*2, mpi->stride[1]*2);
+ my_memcpy_pic(dmpi->planes[2]+dmpi->stride[2],
+ mpi->planes[2]+mpi->stride[2],
+ mpi->chroma_width, mpi->chroma_height/2,
+ dmpi->stride[2]*2, mpi->stride[2]*2);
+ }
+ ret = ff_vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE);
+ vf->priv->out++;
+ if (flags & MP_IMGFIELD_REPEAT_FIRST) {
+ ret |= ff_vf_next_put_image(vf, mpi, MP_NOPTS_VALUE);
+ vf->priv->out++;
+ state=0;
+ } else {
+ my_memcpy_pic(dmpi->planes[0],
+ mpi->planes[0], mpi->w, mpi->h/2,
+ dmpi->stride[0]*2, mpi->stride[0]*2);
+ if (mpi->flags & MP_IMGFLAG_PLANAR) {
+ my_memcpy_pic(dmpi->planes[1],
+ mpi->planes[1],
+ mpi->chroma_width,
+ mpi->chroma_height/2,
+ dmpi->stride[1]*2,
+ mpi->stride[1]*2);
+ my_memcpy_pic(dmpi->planes[2],
+ mpi->planes[2],
+ mpi->chroma_width,
+ mpi->chroma_height/2,
+ dmpi->stride[2]*2,
+ mpi->stride[2]*2);
+ }
+ }
+ }
+
+ vf->priv->state = state;
+
+ return ret;
+}
+
+static int config(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt)
+{
+ return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
+}
+
+static void uninit(struct vf_instance *vf)
+{
+ ff_mp_msg(MSGT_VFILTER, MSGL_INFO, "softpulldown: %lld frames in, %lld frames out\n", vf->priv->in, vf->priv->out);
+ free(vf->priv);
+}
+
+static int vf_open(vf_instance_t *vf, char *args)
+{
+ vf->config = config;
+ vf->put_image = put_image;
+ vf->uninit = uninit;
+ vf->default_reqs = VFCAP_ACCEPT_STRIDE;
+ vf->priv = calloc(1, sizeof(struct vf_priv_s));
+ vf->priv->state = 0;
+ return 1;
+}
+
+const vf_info_t ff_vf_info_softpulldown = {
+ "mpeg2 soft 3:2 pulldown",
+ "softpulldown",
+ "Tobias Diedrich <ranma+mplayer@tdiedrich.de>",
+ "",
+ vf_open,
+ NULL
+};
diff --git a/libavfilter/libmpcodecs/vf_uspp.c b/libavfilter/libmpcodecs/vf_uspp.c
new file mode 100644
index 0000000..c9d9c1f
--- /dev/null
+++ b/libavfilter/libmpcodecs/vf_uspp.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+#include <assert.h>
+
+#include "config.h"
+
+#include "mp_msg.h"
+#include "cpudetect.h"
+
+#include "libavutil/mem.h"
+#include "libavcodec/avcodec.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+#include "av_helpers.h"
+#include "libvo/fastmemcpy.h"
+
+#define XMIN(a,b) ((a) < (b) ? (a) : (b))
+
+#define BLOCK 16
+
+//===========================================================================//
+DECLARE_ALIGNED(8, static const uint8_t, dither)[8][8] = {
+{ 0*4, 48*4, 12*4, 60*4, 3*4, 51*4, 15*4, 63*4, },
+{ 32*4, 16*4, 44*4, 28*4, 35*4, 19*4, 47*4, 31*4, },
+{ 8*4, 56*4, 4*4, 52*4, 11*4, 59*4, 7*4, 55*4, },
+{ 40*4, 24*4, 36*4, 20*4, 43*4, 27*4, 39*4, 23*4, },
+{ 2*4, 50*4, 14*4, 62*4, 1*4, 49*4, 13*4, 61*4, },
+{ 34*4, 18*4, 46*4, 30*4, 33*4, 17*4, 45*4, 29*4, },
+{ 10*4, 58*4, 6*4, 54*4, 9*4, 57*4, 5*4, 53*4, },
+{ 42*4, 26*4, 38*4, 22*4, 41*4, 25*4, 37*4, 21*4, },
+};
+
+static const uint8_t offset[511][2]= {
+{ 0, 0},
+{ 0, 0}, { 8, 8},
+{ 0, 0}, { 4, 4}, {12, 8}, { 8,12},
+{ 0, 0}, {10, 2}, { 4, 4}, {14, 6}, { 8, 8}, { 2,10}, {12,12}, { 6,14},
+
+{ 0, 0}, {10, 2}, { 4, 4}, {14, 6}, { 8, 8}, { 2,10}, {12,12}, { 6,14},
+{ 5, 1}, {15, 3}, { 9, 5}, { 3, 7}, {13, 9}, { 7,11}, { 1,13}, {11,15},
+
+{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 5, 1}, {13, 1}, { 5, 9}, {13, 9},
+{ 2, 2}, {10, 2}, { 2,10}, {10,10}, { 7, 3}, {15, 3}, { 7,11}, {15,11},
+{ 4, 4}, {12, 4}, { 4,12}, {12,12}, { 1, 5}, { 9, 5}, { 1,13}, { 9,13},
+{ 6, 6}, {14, 6}, { 6,14}, {14,14}, { 3, 7}, {11, 7}, { 3,15}, {11,15},
+
+{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 4, 0}, {12, 0}, { 4, 8}, {12, 8},
+{ 1, 1}, { 9, 1}, { 1, 9}, { 9, 9}, { 5, 1}, {13, 1}, { 5, 9}, {13, 9},
+{ 3, 2}, {11, 2}, { 3,10}, {11,10}, { 7, 2}, {15, 2}, { 7,10}, {15,10},
+{ 2, 3}, {10, 3}, { 2,11}, {10,11}, { 6, 3}, {14, 3}, { 6,11}, {14,11},
+{ 0, 4}, { 8, 4}, { 0,12}, { 8,12}, { 4, 4}, {12, 4}, { 4,12}, {12,12},
+{ 1, 5}, { 9, 5}, { 1,13}, { 9,13}, { 5, 5}, {13, 5}, { 5,13}, {13,13},
+{ 3, 6}, {11, 6}, { 3,14}, {11,14}, { 7, 6}, {15, 6}, { 7,14}, {15,14},
+{ 2, 7}, {10, 7}, { 2,15}, {10,15}, { 6, 7}, {14, 7}, { 6,15}, {14,15},
+
+{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 0, 2}, { 8, 2}, { 0,10}, { 8,10},
+{ 0, 4}, { 8, 4}, { 0,12}, { 8,12}, { 0, 6}, { 8, 6}, { 0,14}, { 8,14},
+{ 1, 1}, { 9, 1}, { 1, 9}, { 9, 9}, { 1, 3}, { 9, 3}, { 1,11}, { 9,11},
+{ 1, 5}, { 9, 5}, { 1,13}, { 9,13}, { 1, 7}, { 9, 7}, { 1,15}, { 9,15},
+{ 2, 0}, {10, 0}, { 2, 8}, {10, 8}, { 2, 2}, {10, 2}, { 2,10}, {10,10},
+{ 2, 4}, {10, 4}, { 2,12}, {10,12}, { 2, 6}, {10, 6}, { 2,14}, {10,14},
+{ 3, 1}, {11, 1}, { 3, 9}, {11, 9}, { 3, 3}, {11, 3}, { 3,11}, {11,11},
+{ 3, 5}, {11, 5}, { 3,13}, {11,13}, { 3, 7}, {11, 7}, { 3,15}, {11,15},
+{ 4, 0}, {12, 0}, { 4, 8}, {12, 8}, { 4, 2}, {12, 2}, { 4,10}, {12,10},
+{ 4, 4}, {12, 4}, { 4,12}, {12,12}, { 4, 6}, {12, 6}, { 4,14}, {12,14},
+{ 5, 1}, {13, 1}, { 5, 9}, {13, 9}, { 5, 3}, {13, 3}, { 5,11}, {13,11},
+{ 5, 5}, {13, 5}, { 5,13}, {13,13}, { 5, 7}, {13, 7}, { 5,15}, {13,15},
+{ 6, 0}, {14, 0}, { 6, 8}, {14, 8}, { 6, 2}, {14, 2}, { 6,10}, {14,10},
+{ 6, 4}, {14, 4}, { 6,12}, {14,12}, { 6, 6}, {14, 6}, { 6,14}, {14,14},
+{ 7, 1}, {15, 1}, { 7, 9}, {15, 9}, { 7, 3}, {15, 3}, { 7,11}, {15,11},
+{ 7, 5}, {15, 5}, { 7,13}, {15,13}, { 7, 7}, {15, 7}, { 7,15}, {15,15},
+
+{ 0, 0}, { 8, 0}, { 0, 8}, { 8, 8}, { 4, 4}, {12, 4}, { 4,12}, {12,12}, { 0, 4}, { 8, 4}, { 0,12}, { 8,12}, { 4, 0}, {12, 0}, { 4, 8}, {12, 8}, { 2, 2}, {10, 2}, { 2,10}, {10,10}, { 6, 6}, {14, 6}, { 6,14}, {14,14}, { 2, 6}, {10, 6}, { 2,14}, {10,14}, { 6, 2}, {14, 2}, { 6,10}, {14,10}, { 0, 2}, { 8, 2}, { 0,10}, { 8,10}, { 4, 6}, {12, 6}, { 4,14}, {12,14}, { 0, 6}, { 8, 6}, { 0,14}, { 8,14}, { 4, 2}, {12, 2}, { 4,10}, {12,10}, { 2, 0}, {10, 0}, { 2, 8}, {10, 8}, { 6, 4}, {14, 4}, { 6,12}, {14,12}, { 2, 4}, {10, 4}, { 2,12}, {10,12}, { 6, 0}, {14, 0}, { 6, 8}, {14, 8}, { 1, 1}, { 9, 1}, { 1, 9}, { 9, 9}, { 5, 5}, {13, 5}, { 5,13}, {13,13}, { 1, 5}, { 9, 5}, { 1,13}, { 9,13}, { 5, 1}, {13, 1}, { 5, 9}, {13, 9}, { 3, 3}, {11, 3}, { 3,11}, {11,11}, { 7, 7}, {15, 7}, { 7,15}, {15,15}, { 3, 7}, {11, 7}, { 3,15}, {11,15}, { 7, 3}, {15, 3}, { 7,11}, {15,11}, { 1, 3}, { 9, 3}, { 1,11}, { 9,11}, { 5, 7}, {13, 7}, { 5,15}, {13,15}, { 1, 7}, { 9, 7}, { 1,15}, { 9,15}, { 5, 3}, {13, 3}, { 5,11}, {13,11}, { 3, 1}, {11, 1}
+, { 3, 9}, {11, 9}, { 7, 5}, {15, 5}, { 7,13}, {15,13}, { 3, 5}, {11, 5}, { 3,13}, {11,13}, { 7, 1}, {15, 1}, { 7, 9}, {15, 9}, { 0, 1}, { 8, 1}, { 0, 9}, { 8, 9}, { 4, 5}, {12, 5}, { 4,13}, {12,13}, { 0, 5}, { 8, 5}, { 0,13}, { 8,13}, { 4, 1}, {12, 1}, { 4, 9}, {12, 9}, { 2, 3}, {10, 3}, { 2,11}, {10,11}, { 6, 7}, {14, 7}, { 6,15}, {14,15}, { 2, 7}, {10, 7}, { 2,15}, {10,15}, { 6, 3}, {14, 3}, { 6,11}, {14,11}, { 0, 3}, { 8, 3}, { 0,11}, { 8,11}, { 4, 7}, {12, 7}, { 4,15}, {12,15}, { 0, 7}, { 8, 7}, { 0,15}, { 8,15}, { 4, 3}, {12, 3}, { 4,11}, {12,11}, { 2, 1}, {10, 1}, { 2, 9}, {10, 9}, { 6, 5}, {14, 5}, { 6,13}, {14,13}, { 2, 5}, {10, 5}, { 2,13}, {10,13}, { 6, 1}, {14, 1}, { 6, 9}, {14, 9}, { 1, 0}, { 9, 0}, { 1, 8}, { 9, 8}, { 5, 4}, {13, 4}, { 5,12}, {13,12}, { 1, 4}, { 9, 4}, { 1,12}, { 9,12}, { 5, 0}, {13, 0}, { 5, 8}, {13, 8}, { 3, 2}, {11, 2}, { 3,10}, {11,10}, { 7, 6}, {15, 6}, { 7,14}, {15,14}, { 3, 6}, {11, 6}, { 3,14}, {11,14}, { 7, 2}, {15, 2}, { 7,10}, {15,10}, { 1, 2}, { 9, 2}, { 1,10}, { 9,
+10}, { 5, 6}, {13, 6}, { 5,14}, {13,14}, { 1, 6}, { 9, 6}, { 1,14}, { 9,14}, { 5, 2}, {13, 2}, { 5,10}, {13,10}, { 3, 0}, {11, 0}, { 3, 8}, {11, 8}, { 7, 4}, {15, 4}, { 7,12}, {15,12}, { 3, 4}, {11, 4}, { 3,12}, {11,12}, { 7, 0}, {15, 0}, { 7, 8}, {15, 8},
+};
+
+struct vf_priv_s {
+ int log2_count;
+ int qp;
+ int mode;
+ int mpeg2;
+ int temp_stride[3];
+ uint8_t *src[3];
+ int16_t *temp[3];
+ int outbuf_size;
+ uint8_t *outbuf;
+ AVCodecContext *avctx_enc[BLOCK*BLOCK];
+ AVFrame *frame;
+ AVFrame *frame_dec;
+};
+
+static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale){
+ int y, x;
+
+#define STORE(pos) \
+ temp= ((src[x + y*src_stride + pos]<<log2_scale) + d[pos])>>8;\
+ if(temp & 0x100) temp= ~(temp>>31);\
+ dst[x + y*dst_stride + pos]= temp;
+
+ for(y=0; y<height; y++){
+ const uint8_t *d= dither[y&7];
+ for(x=0; x<width; x+=8){
+ int temp;
+ STORE(0);
+ STORE(1);
+ STORE(2);
+ STORE(3);
+ STORE(4);
+ STORE(5);
+ STORE(6);
+ STORE(7);
+ }
+ }
+}
+
+static void filter(struct vf_priv_s *p, uint8_t *dst[3], uint8_t *src[3], int dst_stride[3], int src_stride[3], int width, int height, uint8_t *qp_store, int qp_stride){
+ int x, y, i, j;
+ const int count= 1<<p->log2_count;
+
+ for(i=0; i<3; i++){
+ int is_chroma= !!i;
+ int w= width >>is_chroma;
+ int h= height>>is_chroma;
+ int stride= p->temp_stride[i];
+ int block= BLOCK>>is_chroma;
+
+ if (!src[i] || !dst[i])
+ continue; // HACK avoid crash for Y8 colourspace
+ for(y=0; y<h; y++){
+ int index= block + block*stride + y*stride;
+ fast_memcpy(p->src[i] + index, src[i] + y*src_stride[i], w);
+ for(x=0; x<block; x++){
+ p->src[i][index - x - 1]= p->src[i][index + x ];
+ p->src[i][index + w + x ]= p->src[i][index + w - x - 1];
+ }
+ }
+ for(y=0; y<block; y++){
+ fast_memcpy(p->src[i] + ( block-1-y)*stride, p->src[i] + ( y+block )*stride, stride);
+ fast_memcpy(p->src[i] + (h+block +y)*stride, p->src[i] + (h-y+block-1)*stride, stride);
+ }
+
+ p->frame->linesize[i]= stride;
+ memset(p->temp[i], 0, (h+2*block)*stride*sizeof(int16_t));
+ }
+
+ if(p->qp)
+ p->frame->quality= p->qp * FF_QP2LAMBDA;
+ else
+ p->frame->quality= norm_qscale(qp_store[0], p->mpeg2) * FF_QP2LAMBDA;
+// init per MB qscale stuff FIXME
+
+ for(i=0; i<count; i++){
+ const int x1= offset[i+count-1][0];
+ const int y1= offset[i+count-1][1];
+ int offset;
+ p->frame->data[0]= p->src[0] + x1 + y1 * p->frame->linesize[0];
+ p->frame->data[1]= p->src[1] + x1/2 + y1/2 * p->frame->linesize[1];
+ p->frame->data[2]= p->src[2] + x1/2 + y1/2 * p->frame->linesize[2];
+
+ avcodec_encode_video(p->avctx_enc[i], p->outbuf, p->outbuf_size, p->frame);
+ p->frame_dec = p->avctx_enc[i]->coded_frame;
+
+ offset= (BLOCK-x1) + (BLOCK-y1)*p->frame_dec->linesize[0];
+ //FIXME optimize
+ for(y=0; y<height; y++){
+ for(x=0; x<width; x++){
+ p->temp[0][ x + y*p->temp_stride[0] ] += p->frame_dec->data[0][ x + y*p->frame_dec->linesize[0] + offset ];
+ }
+ }
+ offset= (BLOCK/2-x1/2) + (BLOCK/2-y1/2)*p->frame_dec->linesize[1];
+ for(y=0; y<height/2; y++){
+ for(x=0; x<width/2; x++){
+ p->temp[1][ x + y*p->temp_stride[1] ] += p->frame_dec->data[1][ x + y*p->frame_dec->linesize[1] + offset ];
+ p->temp[2][ x + y*p->temp_stride[2] ] += p->frame_dec->data[2][ x + y*p->frame_dec->linesize[2] + offset ];
+ }
+ }
+ }
+
+ for(j=0; j<3; j++){
+ int is_chroma= !!j;
+ if (!dst[j])
+ continue; // HACK avoid crash for Y8 colourspace
+ store_slice_c(dst[j], p->temp[j], dst_stride[j], p->temp_stride[j], width>>is_chroma, height>>is_chroma, 8-p->log2_count);
+ }
+}
+
+static int config(struct vf_instance *vf,
+ int width, int height, int d_width, int d_height,
+ unsigned int flags, unsigned int outfmt){
+ int i;
+ AVCodec *enc= avcodec_find_encoder(AV_CODEC_ID_SNOW);
+
+ for(i=0; i<3; i++){
+ int is_chroma= !!i;
+ int w= ((width + 4*BLOCK-1) & (~(2*BLOCK-1)))>>is_chroma;
+ int h= ((height + 4*BLOCK-1) & (~(2*BLOCK-1)))>>is_chroma;
+
+ vf->priv->temp_stride[i]= w;
+ vf->priv->temp[i]= malloc(vf->priv->temp_stride[i]*h*sizeof(int16_t));
+ vf->priv->src [i]= malloc(vf->priv->temp_stride[i]*h*sizeof(uint8_t));
+ }
+ for(i=0; i< (1<<vf->priv->log2_count); i++){
+ AVCodecContext *avctx_enc;
+ AVDictionary *opts = NULL;
+
+ avctx_enc=
+ vf->priv->avctx_enc[i]= avcodec_alloc_context3(NULL);
+ avctx_enc->width = width + BLOCK;
+ avctx_enc->height = height + BLOCK;
+ avctx_enc->time_base= (AVRational){1,25}; // meaningless
+ avctx_enc->gop_size = 300;
+ avctx_enc->max_b_frames= 0;
+ avctx_enc->pix_fmt = AV_PIX_FMT_YUV420P;
+ avctx_enc->flags = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
+ avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+ avctx_enc->global_quality= 123;
+ av_dict_set(&opts, "no_bitstream", "1", 0);
+ if (avcodec_open2(avctx_enc, enc, &opts) < 0)
+ return 0;
+ av_dict_free(&opts);
+ assert(avctx_enc->codec);
+ }
+ vf->priv->frame= av_frame_alloc();
+ vf->priv->frame_dec= av_frame_alloc();
+
+ vf->priv->outbuf_size= (width + BLOCK)*(height + BLOCK)*10;
+ vf->priv->outbuf= malloc(vf->priv->outbuf_size);
+
+ return ff_vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
+}
+
+static void get_image(struct vf_instance *vf, mp_image_t *mpi){
+ if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change
+ // ok, we can do pp in-place (or pp disabled):
+ vf->dmpi=ff_vf_get_image(vf->next,mpi->imgfmt,
+ mpi->type, mpi->flags | MP_IMGFLAG_READABLE, mpi->width, mpi->height);
+ mpi->planes[0]=vf->dmpi->planes[0];
+ mpi->stride[0]=vf->dmpi->stride[0];
+ mpi->width=vf->dmpi->width;
+ if(mpi->flags&MP_IMGFLAG_PLANAR){
+ mpi->planes[1]=vf->dmpi->planes[1];
+ mpi->planes[2]=vf->dmpi->planes[2];
+ mpi->stride[1]=vf->dmpi->stride[1];
+ mpi->stride[2]=vf->dmpi->stride[2];
+ }
+ mpi->flags|=MP_IMGFLAG_DIRECT;
+}
+
+static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts){
+ mp_image_t *dmpi;
+
+ if(!(mpi->flags&MP_IMGFLAG_DIRECT)){
+ // no DR, so get a new image! hope we'll get DR buffer:
+ dmpi=ff_vf_get_image(vf->next,mpi->imgfmt,
+ MP_IMGTYPE_TEMP,
+ MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
+ mpi->width,mpi->height);
+ ff_vf_clone_mpi_attributes(dmpi, mpi);
+ }else{
+ dmpi=vf->dmpi;
+ }
+
+ vf->priv->mpeg2= mpi->qscale_type;
+ if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
+ if(mpi->qscale || vf->priv->qp){
+ filter(vf->priv, dmpi->planes, mpi->planes, dmpi->stride, mpi->stride, mpi->w, mpi->h, mpi->qscale, mpi->qstride);
+ }else{
+ memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);
+ memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);
+ memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);
+ }
+ }
+
+#if HAVE_MMX
+ if(ff_gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
+#endif
+#if HAVE_MMX2
+ if(ff_gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
+#endif
+
+ return ff_vf_next_put_image(vf,dmpi, pts);
+}
+
+static void uninit(struct vf_instance *vf){
+ int i;
+ if(!vf->priv) return;
+
+ for(i=0; i<3; i++){
+ free(vf->priv->temp[i]);
+ vf->priv->temp[i]= NULL;
+ free(vf->priv->src[i]);
+ vf->priv->src[i]= NULL;
+ }
+ for(i=0; i<BLOCK*BLOCK; i++){
+ av_freep(&vf->priv->avctx_enc[i]);
+ }
+
+ free(vf->priv);
+ vf->priv=NULL;
+}
+
+//===========================================================================//
+static int query_format(struct vf_instance *vf, unsigned int fmt){
+ switch(fmt){
+ case IMGFMT_YV12:
+ case IMGFMT_I420:
+ case IMGFMT_IYUV:
+ case IMGFMT_Y800:
+ case IMGFMT_Y8:
+ return ff_vf_next_query_format(vf,fmt);
+ }
+ return 0;
+}
+
+static int control(struct vf_instance *vf, int request, void* data){
+ switch(request){
+ case VFCTRL_QUERY_MAX_PP_LEVEL:
+ return 8;
+ case VFCTRL_SET_PP_LEVEL:
+ vf->priv->log2_count= *((unsigned int*)data);
+ //FIXME we have to realloc a few things here
+ return CONTROL_TRUE;
+ }
+ return ff_vf_next_control(vf,request,data);
+}
+
+static int vf_open(vf_instance_t *vf, char *args){
+
+ int log2c=-1;
+
+ vf->config=config;
+ vf->put_image=put_image;
+ vf->get_image=get_image;
+ vf->query_format=query_format;
+ vf->uninit=uninit;
+ vf->control= control;
+ vf->priv=malloc(sizeof(struct vf_priv_s));
+ memset(vf->priv, 0, sizeof(struct vf_priv_s));
+
+ ff_init_avcodec();
+
+ vf->priv->log2_count= 4;
+
+ if (args) sscanf(args, "%d:%d:%d", &log2c, &vf->priv->qp, &vf->priv->mode);
+
+ if( log2c >=0 && log2c <=8 )
+ vf->priv->log2_count = log2c;
+
+ if(vf->priv->qp < 0)
+ vf->priv->qp = 0;
+
+// #if HAVE_MMX
+// if(ff_gCpuCaps.hasMMX){
+// store_slice= store_slice_mmx;
+// }
+// #endif
+
+ return 1;
+}
+
+const vf_info_t ff_vf_info_uspp = {
+ "ultra simple/slow postprocess",
+ "uspp",
+ "Michael Niedermayer",
+ "",
+ vf_open,
+ NULL
+};
diff --git a/libavfilter/libmpcodecs/vfcap.h b/libavfilter/libmpcodecs/vfcap.h
new file mode 100644
index 0000000..611d642
--- /dev/null
+++ b/libavfilter/libmpcodecs/vfcap.h
@@ -0,0 +1,56 @@
+/* VFCAP_* values: they are flags, returned by query_format():
+ *
+ * This file is part of MPlayer.
+ *
+ * MPlayer is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * MPlayer is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with MPlayer; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPLAYER_VFCAP_H
+#define MPLAYER_VFCAP_H
+
+// set, if the given colorspace is supported (with or without conversion)
+#define VFCAP_CSP_SUPPORTED 0x1
+// set, if the given colorspace is supported _without_ conversion
+#define VFCAP_CSP_SUPPORTED_BY_HW 0x2
+// set if the driver/filter can draw OSD
+#define VFCAP_OSD 0x4
+// set if the driver/filter can handle compressed SPU stream
+#define VFCAP_SPU 0x8
+// scaling up/down by hardware, or software:
+#define VFCAP_HWSCALE_UP 0x10
+#define VFCAP_HWSCALE_DOWN 0x20
+#define VFCAP_SWSCALE 0x40
+// driver/filter can do vertical flip (upside-down)
+#define VFCAP_FLIP 0x80
+
+// driver/hardware handles timing (blocking)
+#define VFCAP_TIMER 0x100
+// driver _always_ flip image upside-down (for ve_vfw)
+#define VFCAP_FLIPPED 0x200
+// vf filter: accepts stride (put_image)
+// vo driver: has draw_slice() support for the given csp
+#define VFCAP_ACCEPT_STRIDE 0x400
+// filter does postprocessing (so you shouldn't scale/filter image before it)
+#define VFCAP_POSTPROC 0x800
+// filter cannot be reconfigured to different size & format
+#define VFCAP_CONSTANT 0x1000
+// filter can draw EOSD
+#define VFCAP_EOSD 0x2000
+// filter will draw EOSD at screen resolution (without scaling)
+#define VFCAP_EOSD_UNSCALED 0x4000
+// used by libvo and vf_vo, indicates the VO does not support draw_slice for this format
+#define VOCAP_NOSLICES 0x8000
+
+#endif /* MPLAYER_VFCAP_H */
OpenPOWER on IntegriCloud