summaryrefslogtreecommitdiffstats
path: root/libswscale
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/Makefile8
-rw-r--r--libswscale/arm/Makefile4
-rw-r--r--libswscale/arm/rgb2yuv_neon_16.S80
-rw-r--r--libswscale/arm/rgb2yuv_neon_32.S119
-rw-r--r--libswscale/arm/rgb2yuv_neon_common.S291
-rw-r--r--libswscale/arm/swscale_unscaled.c79
-rw-r--r--libswscale/bayer_template.c236
-rw-r--r--libswscale/colorspace-test.c18
-rw-r--r--libswscale/hscale_fast_bilinear.c55
-rw-r--r--libswscale/input.c666
-rw-r--r--libswscale/options.c31
-rw-r--r--libswscale/output.c1027
-rw-r--r--libswscale/ppc/swscale_altivec.c10
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.c26
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.h8
-rw-r--r--libswscale/ppc/yuv2yuv_altivec.c8
-rw-r--r--libswscale/rgb2rgb.c120
-rw-r--r--libswscale/rgb2rgb.h28
-rw-r--r--libswscale/rgb2rgb_template.c105
-rw-r--r--libswscale/swscale-test.c70
-rw-r--r--libswscale/swscale.c651
-rw-r--r--libswscale/swscale.h22
-rw-r--r--libswscale/swscale_internal.h264
-rw-r--r--libswscale/swscale_unscaled.c1355
-rw-r--r--libswscale/swscaleres.rc55
-rw-r--r--libswscale/utils.c939
-rw-r--r--libswscale/version.h15
-rw-r--r--libswscale/x86/Makefile4
-rw-r--r--libswscale/x86/hscale_fast_bilinear_simd.c374
-rw-r--r--libswscale/x86/input.asm252
-rw-r--r--libswscale/x86/output.asm12
-rw-r--r--libswscale/x86/rgb2rgb.c17
-rw-r--r--libswscale/x86/rgb2rgb_template.c215
-rw-r--r--libswscale/x86/scale.asm14
-rw-r--r--libswscale/x86/swscale.c141
-rw-r--r--libswscale/x86/swscale_template.c420
-rw-r--r--libswscale/x86/w64xmmtest.c8
-rw-r--r--libswscale/x86/yuv2rgb.c25
-rw-r--r--libswscale/x86/yuv2rgb_template.c46
-rw-r--r--libswscale/yuv2rgb.c125
40 files changed, 5729 insertions, 2214 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 3e8614d..067e2b9 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -1,9 +1,12 @@
+include $(SUBDIR)../config.mak
+
NAME = swscale
HEADERS = swscale.h \
version.h \
-OBJS = input.o \
+OBJS = hscale_fast_bilinear.o \
+ input.o \
options.o \
output.o \
rgb2rgb.o \
@@ -12,5 +15,8 @@ OBJS = input.o \
utils.o \
yuv2rgb.o \
+# Windows resource file
+SLIBOBJS-$(HAVE_GNU_WINDRES) += swscaleres.o
+
TESTPROGS = colorspace \
swscale \
diff --git a/libswscale/arm/Makefile b/libswscale/arm/Makefile
new file mode 100644
index 0000000..8b5a97b
--- /dev/null
+++ b/libswscale/arm/Makefile
@@ -0,0 +1,4 @@
+# OBJS += arm/swscale_unscaled.o
+
+# NEON-OBJS += arm/rgb2yuv_neon_32.o
+# NEON-OBJS += arm/rgb2yuv_neon_16.o
diff --git a/libswscale/arm/rgb2yuv_neon_16.S b/libswscale/arm/rgb2yuv_neon_16.S
new file mode 100644
index 0000000..601bc9a
--- /dev/null
+++ b/libswscale/arm/rgb2yuv_neon_16.S
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "rgb2yuv_neon_common.S"
+
+/* downsampled R16G16B16 x8 */
+alias_qw r16x8, q7
+alias_qw g16x8, q8
+alias_qw b16x8, q9
+
+alias n16x16_l, q11
+alias n16x16_h, q12
+
+alias y16x16_l, q13
+alias y16x16_h, q14
+
+alias_qw y8x16, q15
+
+.macro init src
+ vld3.i32 {q13_l, q14_l, q15_l}, [\src]!
+ vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src]
+ vrshrn.i32 CO_R, q13, #7
+ vrshrn.i32 CO_G, q14, #7
+ vrshrn.i32 CO_B, q15, #7
+
+ vmov.u8 BIAS_Y, #16
+ vmov.u8 BIAS_U, #128
+.endm
+
+
+.macro compute_y_16x1_step action, s8x16, coeff
+ vmovl.u8 n16x16_l, \s8x16\()_l
+ vmovl.u8 n16x16_h, \s8x16\()_h
+
+ \action y16x16_l, n16x16_l, \coeff
+ \action y16x16_h, n16x16_h, \coeff
+.endm
+
+.macro compute_y_16x1
+ compute_y_16x1_step vmul, r8x16, CO_RY
+ compute_y_16x1_step vmla, g8x16, CO_GY
+ compute_y_16x1_step vmla, b8x16, CO_BY
+
+ vrshrn.i16 y8x16_l, y16x16_l, #8
+ vrshrn.i16 y8x16_h, y16x16_h, #8
+
+ vadd.u8 y8x16, y8x16, BIAS_Y
+.endm
+
+alias c16x8, q15
+alias_qw c8x8x2, q10
+
+
+.macro compute_chroma_8x1 c, C
+ vmul c16x8, r16x8, CO_R\C
+ vmla c16x8, g16x8, CO_G\C
+ vmla c16x8, b16x8, CO_B\C
+
+ vrshrn.i16 \c\()8x8, c16x8, #8
+ vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C
+.endm
+
+ loop_420sp rgbx, nv12, init, kernel_420_16x2, 16
diff --git a/libswscale/arm/rgb2yuv_neon_32.S b/libswscale/arm/rgb2yuv_neon_32.S
new file mode 100644
index 0000000..f51a5f1
--- /dev/null
+++ b/libswscale/arm/rgb2yuv_neon_32.S
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "rgb2yuv_neon_common.S"
+
+/* downsampled R16G16B16 x8 */
+alias_qw r16x8, q7
+alias_qw g16x8, q8
+alias_qw b16x8, q9
+
+alias n16x16_o, q11
+alias n16x16_ol, q11_l
+alias n16x16_oh, q11_h
+
+alias y32x16_el, q12
+alias y32x16_eh, q13
+alias y32x16_ol, q14
+alias y32x16_oh, q15
+
+alias y16x16_e, q12
+alias y16x16_el, q12_l
+alias y16x16_eh, q12_h
+alias y16x16_o, q13
+alias y16x16_ol, q13_l
+alias y16x16_oh, q13_h
+
+
+alias y8x16, y16x16_e
+
+
+.macro init src
+ // load s32x3x3, narrow to s16x3x3
+ vld3.i32 {q13_l, q14_l, q15_l}, [\src]!
+ vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src]
+
+ vmovn.i32 CO_R, q13
+ vmovn.i32 CO_G, q14
+ vmovn.i32 CO_B, q15
+
+ vmov.u8 BIAS_Y, #16
+ vmov.u8 BIAS_U, #128
+.endm
+
+
+.macro compute_y_16x1_step action, s8x16, coeff
+ vmov.u8 n16x16_o, #0
+ vtrn.u8 \s8x16, n16x16_o
+
+ \action y32x16_el, \s8x16\()_l, \coeff
+ \action y32x16_eh, \s8x16\()_h, \coeff
+ \action y32x16_ol, n16x16_ol, \coeff
+ \action y32x16_oh, n16x16_oh, \coeff
+.endm
+
+/*
+ * in: r8x16, g8x16, b8x16
+ * out: y8x16
+ * clobber: q11-q15, r8x16, g8x16, b8x16
+ */
+.macro compute_y_16x1
+ compute_y_16x1_step vmull, r8x16, CO_RY
+ compute_y_16x1_step vmlal, g8x16, CO_GY
+ compute_y_16x1_step vmlal, b8x16, CO_BY
+
+ vrshrn.i32 y16x16_el, y32x16_el, #15
+ vrshrn.i32 y16x16_eh, y32x16_eh, #15
+ vrshrn.i32 y16x16_ol, y32x16_ol, #15
+ vrshrn.i32 y16x16_oh, y32x16_oh, #15
+
+ vtrn.8 y16x16_e, y16x16_o
+ vadd.u8 y8x16, y8x16, BIAS_Y
+.endm
+
+alias c32x8_l, q14
+alias c32x8_h, q15
+
+alias_qw c16x8, q13
+alias_qw c8x8x2, q10
+
+.macro compute_chroma_8x1_step action, s16x8, coeff
+ \action c32x8_l, \s16x8\()_l, \coeff
+ \action c32x8_h, \s16x8\()_h, \coeff
+.endm
+
+/*
+ * in: r16x8, g16x8, b16x8
+ * out: c8x8
+ * clobber: q14-q15
+ */
+.macro compute_chroma_8x1 c, C
+ compute_chroma_8x1_step vmull, r16x8, CO_R\C
+ compute_chroma_8x1_step vmlal, g16x8, CO_G\C
+ compute_chroma_8x1_step vmlal, b16x8, CO_B\C
+
+ vrshrn.i32 c16x8_l, c32x8_l, #15
+ vrshrn.i32 c16x8_h, c32x8_h, #15
+ vmovn.i16 \c\()8x8, c16x8
+ vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C
+.endm
+
+
+ loop_420sp rgbx, nv12, init, kernel_420_16x2, 32
diff --git a/libswscale/arm/rgb2yuv_neon_common.S b/libswscale/arm/rgb2yuv_neon_common.S
new file mode 100644
index 0000000..30bcecd
--- /dev/null
+++ b/libswscale/arm/rgb2yuv_neon_common.S
@@ -0,0 +1,291 @@
+/*
+ * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+.macro alias name, tgt, set=1
+.if \set != 0
+ \name .req \tgt
+.else
+ .unreq \name
+.endif
+.endm
+
+.altmacro
+
+.macro alias_dw_all qw, dw_l, dw_h
+ alias q\qw\()_l, d\dw_l
+ alias q\qw\()_h, d\dw_h
+ .if \qw < 15
+ alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
+ .endif
+.endm
+
+alias_dw_all 0, 0, 1
+
+.noaltmacro
+
+.macro alias_qw name, qw, set=1
+ alias \name\(), \qw, \set
+ alias \name\()_l, \qw\()_l, \set
+ alias \name\()_h, \qw\()_h, \set
+.endm
+
+.macro prologue
+ push {r4-r12, lr}
+ vpush {q4-q7}
+.endm
+
+.macro epilogue
+ vpop {q4-q7}
+ pop {r4-r12, pc}
+.endm
+
+.macro load_arg reg, ix
+ ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
+.endm
+
+
+/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
+ * int width, int height,
+ * int y_stride, int c_stride, int src_stride,
+ * int32_t coeff_table[9]);
+ */
+.macro alias_loop_420sp set=1
+ alias src, r0, \set
+ alias src0, src, \set
+ alias y, r1, \set
+ alias y0, y, \set
+ alias chroma, r2, \set
+ alias width, r3, \set
+ alias header, width, \set
+
+ alias height, r4, \set
+ alias y_stride, r5, \set
+ alias c_stride, r6, \set
+ alias c_padding, c_stride, \set
+ alias src_stride, r7, \set
+
+ alias y0_end, r8, \set
+
+ alias src_padding,r9, \set
+ alias y_padding, r10, \set
+
+ alias src1, r11, \set
+ alias y1, r12, \set
+
+ alias coeff_table,r12, \set
+.endm
+
+
+.macro loop_420sp s_fmt, d_fmt, init, kernel, precision
+
+function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
+ prologue
+
+ alias_loop_420sp
+
+ load_arg height, 4
+ load_arg y_stride, 5
+ load_arg c_stride, 6
+ load_arg src_stride, 7
+ load_arg coeff_table, 8
+
+ \init coeff_table
+
+ sub y_padding, y_stride, width
+ sub c_padding, c_stride, width
+ sub src_padding, src_stride, width, LSL #2
+
+ add y0_end, y0, width
+ and header, width, #15
+
+ add y1, y0, y_stride
+ add src1, src0, src_stride
+
+0:
+ cmp header, #0
+ beq 1f
+
+ \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
+
+1:
+ \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
+
+ cmp y0, y0_end
+ blt 1b
+2:
+ add y0, y1, y_padding
+ add y0_end, y1, y_stride
+ add chroma, chroma, c_padding
+ add src0, src1, src_padding
+
+ add y1, y0, y_stride
+ add src1, src0, src_stride
+
+ subs height, height, #2
+
+ bgt 0b
+
+ epilogue
+
+ alias_loop_420sp 0
+
+endfunc
+.endm
+
+.macro downsample
+ vpaddl.u8 r16x8, r8x16
+ vpaddl.u8 g16x8, g8x16
+ vpaddl.u8 b16x8, b8x16
+.endm
+
+
+/* acculumate and right shift by 2 */
+.macro downsample_ars2
+ vpadal.u8 r16x8, r8x16
+ vpadal.u8 g16x8, g8x16
+ vpadal.u8 b16x8, b8x16
+
+ vrshr.u16 r16x8, r16x8, #2
+ vrshr.u16 g16x8, g16x8, #2
+ vrshr.u16 b16x8, b16x8, #2
+.endm
+
+.macro store_y8_16x1 dst, count
+.ifc "\count",""
+ vstmia \dst!, {y8x16}
+.else
+ vstmia \dst, {y8x16}
+ add \dst, \dst, \count
+.endif
+.endm
+
+.macro store_chroma_nv12_8x1 dst, count
+.ifc "\count",""
+ vst2.i8 {u8x8, v8x8}, [\dst]!
+.else
+ vst2.i8 {u8x8, v8x8}, [\dst], \count
+.endif
+.endm
+
+.macro store_chroma_nv21_8x1 dst, count
+.ifc "\count",""
+ vst2.i8 {v8x8, u8x8}, [\dst]!
+.else
+ vst2.i8 {v8x8, u8x8}, [\dst], \count
+.endif
+.endm
+
+.macro load_8888_16x1 a, b, c, d, src, count
+.ifc "\count",""
+ vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
+ vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
+.else
+ vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
+ vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
+ sub \src, \src, #32
+ add \src, \src, \count, LSL #2
+.endif
+.endm
+
+.macro load_rgbx_16x1 src, count
+ load_8888_16x1 r, g, b, x, \src, \count
+.endm
+
+.macro load_bgrx_16x1 src, count
+ load_8888_16x1 b, g, r, x, \src, \count
+.endm
+
+.macro alias_src_rgbx set=1
+ alias_src_8888 r, g, b, x, \set
+.endm
+
+.macro alias_src_bgrx set=1
+ alias_src_8888 b, g, r, x, \set
+.endm
+
+.macro alias_dst_nv12 set=1
+ alias u8x8, c8x8x2_l, \set
+ alias v8x8, c8x8x2_h, \set
+.endm
+
+.macro alias_dst_nv21 set=1
+ alias v8x8, c8x8x2_l, \set
+ alias u8x8, c8x8x2_h, \set
+.endm
+
+
+// common aliases
+
+alias CO_R d0
+CO_RY .dn d0.s16[0]
+CO_RU .dn d0.s16[1]
+CO_RV .dn d0.s16[2]
+
+alias CO_G d1
+CO_GY .dn d1.s16[0]
+CO_GU .dn d1.s16[1]
+CO_GV .dn d1.s16[2]
+
+alias CO_B d2
+CO_BY .dn d2.s16[0]
+CO_BU .dn d2.s16[1]
+CO_BV .dn d2.s16[2]
+
+alias BIAS_U, d3
+alias BIAS_V, BIAS_U
+
+alias BIAS_Y, q2
+
+
+/* q3-q6 R8G8B8X8 x16 */
+
+.macro alias_src_8888 a, b, c, d, set
+ alias_qw \a\()8x16, q3, \set
+ alias_qw \b\()8x16, q4, \set
+ alias_qw \c\()8x16, q5, \set
+ alias_qw \d\()8x16, q6, \set
+.endm
+
+.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
+ alias_src_\rgb_fmt
+ alias_dst_\yuv_fmt
+
+ load_\rgb_fmt\()_16x1 \rgb0, \count
+
+ downsample
+ compute_y_16x1
+ store_y8_16x1 \y0, \count
+
+
+ load_\rgb_fmt\()_16x1 \rgb1, \count
+ downsample_ars2
+ compute_y_16x1
+ store_y8_16x1 \y1, \count
+
+ compute_chroma_8x1 u, U
+ compute_chroma_8x1 v, V
+
+ store_chroma_\yuv_fmt\()_8x1 \chroma, \count
+
+ alias_dst_\yuv_fmt 0
+ alias_src_\rgb_fmt 0
+.endm
diff --git a/libswscale/arm/swscale_unscaled.c b/libswscale/arm/swscale_unscaled.c
new file mode 100644
index 0000000..04be762
--- /dev/null
+++ b/libswscale/arm/swscale_unscaled.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
+#include "libavutil/arm/cpu.h"
+
+extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
+ int width, int height,
+ int y_stride, int c_stride, int src_stride,
+ int32_t coeff_tbl[9]);
+
+extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma,
+ int width, int height,
+ int y_stride, int c_stride, int src_stride,
+ int32_t coeff_tbl[9]);
+
+static int rgbx_to_nv12_neon_32_wrapper(SwsContext *context, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[]) {
+
+ rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0],
+ dst[0] + srcSliceY * dstStride[0],
+ dst[1] + (srcSliceY / 2) * dstStride[1],
+ context->srcW, srcSliceH,
+ dstStride[0], dstStride[1], srcStride[0],
+ context->input_rgb2yuv_table);
+
+ return 0;
+}
+
+static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[]) {
+
+ rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0],
+ dst[0] + srcSliceY * dstStride[0],
+ dst[1] + (srcSliceY / 2) * dstStride[1],
+ context->srcW, srcSliceH,
+ dstStride[0], dstStride[1], srcStride[0],
+ context->input_rgb2yuv_table);
+
+ return 0;
+}
+
+static void get_unscaled_swscale_neon(SwsContext *c) {
+ int accurate_rnd = c->flags & SWS_ACCURATE_RND;
+ if (c->srcFormat == AV_PIX_FMT_RGBA
+ && c->dstFormat == AV_PIX_FMT_NV12
+ && (c->srcW >= 16)) {
+ c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
+ : rgbx_to_nv12_neon_16_wrapper;
+ }
+}
+
+void ff_get_unscaled_swscale_arm(SwsContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+ if (have_neon(cpu_flags))
+ get_unscaled_swscale_neon(c);
+}
diff --git a/libswscale/bayer_template.c b/libswscale/bayer_template.c
new file mode 100644
index 0000000..67ab2ae
--- /dev/null
+++ b/libswscale/bayer_template.c
@@ -0,0 +1,236 @@
+/*
+ * Bayer-to-RGB/YV12 template
+ * Copyright (c) 2011-2014 Peter Ross <pross@xvid.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(BAYER_BGGR) || defined(BAYER_GBRG)
+#define BAYER_R 0
+#define BAYER_G 1
+#define BAYER_B 2
+#endif
+#if defined(BAYER_RGGB) || defined(BAYER_GRBG)
+#define BAYER_R 2
+#define BAYER_G 1
+#define BAYER_B 0
+#endif
+
+#if defined(BAYER_8)
+#define BAYER_READ(x) (x)
+#define BAYER_SIZEOF 1
+#define BAYER_SHIFT 0
+#endif
+#if defined(BAYER_16LE)
+#define BAYER_READ(x) AV_RL16(&x)
+#define BAYER_SIZEOF 2
+#define BAYER_SHIFT 8
+#endif
+#if defined(BAYER_16BE)
+#define BAYER_READ(x) AV_RB16(&x)
+#define BAYER_SIZEOF 2
+#define BAYER_SHIFT 8
+#endif
+
+#define S(y, x) BAYER_READ(src[(y)*src_stride + BAYER_SIZEOF*(x)])
+#define T(y, x) (unsigned int)S(y, x)
+#define R(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_R]
+#define G(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_G]
+#define B(y, x) dst[(y)*dst_stride + (x)*3 + BAYER_B]
+
+#if defined(BAYER_BGGR) || defined(BAYER_RGGB)
+#define BAYER_TO_RGB24_COPY \
+ R(0, 0) = \
+ R(0, 1) = \
+ R(1, 1) = \
+ R(1, 0) = S(1, 1) >> BAYER_SHIFT; \
+ \
+ G(0, 1) = S(0, 1) >> BAYER_SHIFT; \
+ G(0, 0) = \
+ G(1, 1) = (T(0, 1) + T(1, 0)) >> (1 + BAYER_SHIFT); \
+ G(1, 0) = S(1, 0) >> BAYER_SHIFT; \
+ \
+ B(1, 1) = \
+ B(0, 0) = \
+ B(0, 1) = \
+ B(1, 0) = S(0, 0) >> BAYER_SHIFT;
+#define BAYER_TO_RGB24_INTERPOLATE \
+ R(0, 0) = (T(-1, -1) + T(-1, 1) + T(1, -1) + T(1, 1)) >> (2 + BAYER_SHIFT); \
+ G(0, 0) = (T(-1, 0) + T( 0, -1) + T(0, 1) + T(1, 0)) >> (2 + BAYER_SHIFT); \
+ B(0, 0) = S(0, 0) >> BAYER_SHIFT; \
+ \
+ R(0, 1) = (T(-1, 1) + T(1, 1)) >> (1 + BAYER_SHIFT); \
+ G(0, 1) = S(0, 1) >> BAYER_SHIFT; \
+ B(0, 1) = (T(0, 0) + T(0, 2)) >> (1 + BAYER_SHIFT); \
+ \
+ R(1, 0) = (T(1, -1) + T(1, 1)) >> (1 + BAYER_SHIFT); \
+ G(1, 0) = S(1, 0) >> BAYER_SHIFT; \
+ B(1, 0) = (T(0, 0) + T(2, 0)) >> (1 + BAYER_SHIFT); \
+ \
+ R(1, 1) = S(1, 1) >> BAYER_SHIFT; \
+ G(1, 1) = (T(0, 1) + T(1, 0) + T(1, 2) + T(2, 1)) >> (2 + BAYER_SHIFT); \
+ B(1, 1) = (T(0, 0) + T(0, 2) + T(2, 0) + T(2, 2)) >> (2 + BAYER_SHIFT);
+#else
+#define BAYER_TO_RGB24_COPY \
+ R(0, 0) = \
+ R(0, 1) = \
+ R(1, 1) = \
+ R(1, 0) = S(1, 0) >> BAYER_SHIFT; \
+ \
+ G(0, 0) = S(0, 0) >> BAYER_SHIFT; \
+ G(1, 1) = S(1, 1) >> BAYER_SHIFT; \
+ G(0, 1) = \
+ G(1, 0) = (T(0, 0) + T(1, 1)) >> (1 + BAYER_SHIFT); \
+ \
+ B(1, 1) = \
+ B(0, 0) = \
+ B(0, 1) = \
+ B(1, 0) = S(0, 1) >> BAYER_SHIFT;
+#define BAYER_TO_RGB24_INTERPOLATE \
+ R(0, 0) = (T(-1, 0) + T(1, 0)) >> (1 + BAYER_SHIFT); \
+ G(0, 0) = S(0, 0) >> BAYER_SHIFT; \
+ B(0, 0) = (T(0, -1) + T(0, 1)) >> (1 + BAYER_SHIFT); \
+ \
+ R(0, 1) = (T(-1, 0) + T(-1, 2) + T(1, 0) + T(1, 2)) >> (2 + BAYER_SHIFT); \
+ G(0, 1) = (T(-1, 1) + T(0, 0) + T(0, 2) + T(1, 1)) >> (2 + BAYER_SHIFT); \
+ B(0, 1) = S(0, 1) >> BAYER_SHIFT; \
+ \
+ R(1, 0) = S(1, 0) >> BAYER_SHIFT; \
+ G(1, 0) = (T(0, 0) + T(1, -1) + T(1, 1) + T(2, 0)) >> (2 + BAYER_SHIFT); \
+ B(1, 0) = (T(0, -1) + T(0, 1) + T(2, -1) + T(2, 1)) >> (2 + BAYER_SHIFT); \
+ \
+ R(1, 1) = (T(1, 0) + T(1, 2)) >> (1 + BAYER_SHIFT); \
+ G(1, 1) = S(1, 1) >> BAYER_SHIFT; \
+ B(1, 1) = (T(0, 1) + T(2, 1)) >> (1 + BAYER_SHIFT);
+#endif
+
+/**
+ * invoke ff_rgb24toyv12 for 2x2 pixels
+ */
+#define rgb24toyv12_2x2(src, dstY, dstU, dstV, luma_stride, src_stride, rgb2yuv) \
+ ff_rgb24toyv12(src, dstY, dstV, dstU, 2, 2, luma_stride, 0, src_stride, rgb2yuv)
+
+static void BAYER_RENAME(rgb24_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width)
+{
+ int i;
+ for (i = 0 ; i < width; i+= 2) {
+ BAYER_TO_RGB24_COPY
+ src += 2 * BAYER_SIZEOF;
+ dst += 6;
+ }
+}
+
+static void BAYER_RENAME(rgb24_interpolate)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width)
+{
+ int i;
+
+ BAYER_TO_RGB24_COPY
+ src += 2 * BAYER_SIZEOF;
+ dst += 6;
+
+ for (i = 2 ; i < width - 2; i+= 2) {
+ BAYER_TO_RGB24_INTERPOLATE
+ src += 2 * BAYER_SIZEOF;
+ dst += 6;
+ }
+
+ if (width > 2) {
+ BAYER_TO_RGB24_COPY
+ }
+}
+
+static void BAYER_RENAME(yv12_copy)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv)
+{
+ uint8_t dst[12];
+ const int dst_stride = 6;
+ int i;
+ for (i = 0 ; i < width; i+= 2) {
+ BAYER_TO_RGB24_COPY
+ rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
+ src += 2 * BAYER_SIZEOF;
+ dstY += 2;
+ dstU++;
+ dstV++;
+ }
+}
+
+static void BAYER_RENAME(yv12_interpolate)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv)
+{
+ uint8_t dst[12];
+ const int dst_stride = 6;
+ int i;
+
+ BAYER_TO_RGB24_COPY
+ rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
+ src += 2 * BAYER_SIZEOF;
+ dstY += 2;
+ dstU++;
+ dstV++;
+
+ for (i = 2 ; i < width - 2; i+= 2) {
+ BAYER_TO_RGB24_INTERPOLATE
+ rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
+ src += 2 * BAYER_SIZEOF;
+ dstY += 2;
+ dstU++;
+ dstV++;
+ }
+
+ if (width > 2) {
+ BAYER_TO_RGB24_COPY
+ rgb24toyv12_2x2(dst, dstY, dstU, dstV, luma_stride, dst_stride, rgb2yuv);
+ }
+}
+
+#undef S
+#undef T
+#undef R
+#undef G
+#undef B
+#undef BAYER_TO_RGB24_COPY
+#undef BAYER_TO_RGB24_INTERPOLATE
+
+#undef BAYER_RENAME
+
+#undef BAYER_R
+#undef BAYER_G
+#undef BAYER_B
+#undef BAYER_READ
+#undef BAYER_SIZEOF
+#undef BAYER_SHIFT
+
+#if defined(BAYER_BGGR)
+#undef BAYER_BGGR
+#endif
+#if defined(BAYER_RGGB)
+#undef BAYER_RGGB
+#endif
+#if defined(BAYER_GBRG)
+#undef BAYER_GBRG
+#endif
+#if defined(BAYER_GRBG)
+#undef BAYER_GRBG
+#endif
+#if defined(BAYER_8)
+#undef BAYER_8
+#endif
+#if defined(BAYER_16LE)
+#undef BAYER_16LE
+#endif
+#if defined(BAYER_16BE)
+#undef BAYER_16BE
+#endif
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index fbf595d..42a915b 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -1,20 +1,20 @@
/*
* Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -33,7 +33,7 @@
#define FUNC(s, d, n) { s, d, #n, n }
-int main(void)
+int main(int argc, char **argv)
{
int i, funcNum;
uint8_t *srcBuffer = av_malloc(SIZE);
@@ -54,6 +54,7 @@ int main(void)
const char *name;
void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
} func_info[] = {
+ FUNC(2, 2, rgb12to15),
FUNC(2, 2, rgb15to16),
FUNC(2, 3, rgb15to24),
FUNC(2, 4, rgb15to32),
@@ -66,6 +67,7 @@ int main(void)
FUNC(4, 2, rgb32to16),
FUNC(4, 3, rgb32to24),
FUNC(2, 2, rgb16to15),
+ FUNC(2, 2, rgb12tobgr12),
FUNC(2, 2, rgb15tobgr15),
FUNC(2, 2, rgb15tobgr16),
FUNC(2, 3, rgb15tobgr24),
@@ -82,6 +84,12 @@ int main(void)
FUNC(4, 2, rgb32tobgr16),
FUNC(4, 3, rgb32tobgr24),
FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */
+ FUNC(6, 6, rgb48tobgr48_nobswap),
+ FUNC(6, 6, rgb48tobgr48_bswap),
+ FUNC(8, 6, rgb64to48_nobswap),
+ FUNC(8, 6, rgb64to48_bswap),
+ FUNC(8, 6, rgb64tobgr48_nobswap),
+ FUNC(8, 6, rgb64tobgr48_bswap),
FUNC(0, 0, NULL)
};
int width;
diff --git a/libswscale/hscale_fast_bilinear.c b/libswscale/hscale_fast_bilinear.c
new file mode 100644
index 0000000..82d6177
--- /dev/null
+++ b/libswscale/hscale_fast_bilinear.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_internal.h"
+
+void ff_hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc)
+{
+ int i;
+ unsigned int xpos = 0;
+ for (i = 0; i < dstWidth; i++) {
+ register unsigned int xx = xpos >> 16;
+ register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+ dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
+ xpos += xInc;
+ }
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
+}
+
+void ff_hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc)
+{
+ int i;
+ unsigned int xpos = 0;
+ for (i = 0; i < dstWidth; i++) {
+ register unsigned int xx = xpos >> 16;
+ register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
+ dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
+ dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
+ xpos += xInc;
+ }
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
+}
diff --git a/libswscale/input.c b/libswscale/input.c
index f583b3f..6716f0d 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1,24 +1,23 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
@@ -30,38 +29,117 @@
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/pixdesc.h"
+#include "libavutil/avassert.h"
#include "config.h"
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
-#define RGB2YUV_SHIFT 15
-#define BY ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BV (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BU ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GY ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GV (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GU (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RY ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RV ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RU (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? b_r : r_b)
-#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE) ? r_b : b_r)
+#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b)
+#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r)
+
+static av_always_inline void
+rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
+ enum AVPixelFormat origin, int32_t *rgb2yuv)
+{
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+ int i;
+ for (i = 0; i < width; i++) {
+ unsigned int r_b = input_pixel(&src[i*4+0]);
+ unsigned int g = input_pixel(&src[i*4+1]);
+ unsigned int b_r = input_pixel(&src[i*4+2]);
+
+ dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static av_always_inline void
+rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
+ const uint16_t *src1, const uint16_t *src2,
+ int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+{
+ int i;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r_b = input_pixel(&src1[i*4+0]);
+ int g = input_pixel(&src1[i*4+1]);
+ int b_r = input_pixel(&src1[i*4+2]);
+
+ dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static av_always_inline void
+rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
+ const uint16_t *src1, const uint16_t *src2,
+ int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+{
+ int i;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
+ int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
+ int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
+
+ dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+#define rgb64funcs(pattern, BE_LE, origin) \
+static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
+ int width, uint32_t *rgb2yuv) \
+{ \
+ const uint16_t *src = (const uint16_t *) _src; \
+ uint16_t *dst = (uint16_t *) _dst; \
+ rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
+} \
+ \
+static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
+ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
+ int width, uint32_t *rgb2yuv) \
+{ \
+ const uint16_t *src1 = (const uint16_t *) _src1, \
+ *src2 = (const uint16_t *) _src2; \
+ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
+ rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+} \
+ \
+static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
+ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
+ int width, uint32_t *rgb2yuv) \
+{ \
+ const uint16_t *src1 = (const uint16_t *) _src1, \
+ *src2 = (const uint16_t *) _src2; \
+ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
+ rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+}
+
+rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
+rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE)
+rgb64funcs(bgr, LE, AV_PIX_FMT_BGRA64LE)
+rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE)
static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
const uint16_t *src, int width,
- enum AVPixelFormat origin)
+ enum AVPixelFormat origin,
+ int32_t *rgb2yuv)
{
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
int i;
for (i = 0; i < width; i++) {
unsigned int r_b = input_pixel(&src[i * 3 + 0]);
unsigned int g = input_pixel(&src[i * 3 + 1]);
unsigned int b_r = input_pixel(&src[i * 3 + 2]);
- dst[i] = (RY * r + GY * g + BY * b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dst[i] = (ry*r + gy*g + by*b + (0x2001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
}
}
@@ -70,17 +148,20 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
const uint16_t *src1,
const uint16_t *src2,
int width,
- enum AVPixelFormat origin)
+ enum AVPixelFormat origin,
+ int32_t *rgb2yuv)
{
int i;
- assert(src1 == src2);
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r_b = input_pixel(&src1[i * 3 + 0]);
int g = input_pixel(&src1[i * 3 + 1]);
int b_r = input_pixel(&src1[i * 3 + 2]);
- dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
}
}
@@ -89,10 +170,13 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
const uint16_t *src1,
const uint16_t *src2,
int width,
- enum AVPixelFormat origin)
+ enum AVPixelFormat origin,
+ int32_t *rgb2yuv)
{
int i;
- assert(src1 == src2);
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r_b = (input_pixel(&src1[6 * i + 0]) +
input_pixel(&src1[6 * i + 3]) + 1) >> 1;
@@ -101,8 +185,8 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
int b_r = (input_pixel(&src1[6 * i + 2]) +
input_pixel(&src1[6 * i + 5]) + 1) >> 1;
- dstU[i] = (RU * r + GU * g + BU * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (ru*r + gu*g + bu*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstV[i] = (rv*r + gv*g + bv*b + (0x10001 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
}
}
@@ -113,40 +197,43 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
#define rgb48funcs(pattern, BE_LE, origin) \
static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
const uint8_t *_src, \
+ const uint8_t *unused0, const uint8_t *unused1,\
int width, \
- uint32_t *unused) \
+ uint32_t *rgb2yuv) \
{ \
const uint16_t *src = (const uint16_t *)_src; \
uint16_t *dst = (uint16_t *)_dst; \
- rgb48ToY_c_template(dst, src, width, origin); \
+ rgb48ToY_c_template(dst, src, width, origin, rgb2yuv); \
} \
\
static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
uint8_t *_dstV, \
+ const uint8_t *unused0, \
const uint8_t *_src1, \
const uint8_t *_src2, \
int width, \
- uint32_t *unused) \
+ uint32_t *rgb2yuv) \
{ \
const uint16_t *src1 = (const uint16_t *)_src1, \
*src2 = (const uint16_t *)_src2; \
uint16_t *dstU = (uint16_t *)_dstU, \
*dstV = (uint16_t *)_dstV; \
- rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
+ rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
} \
\
static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
uint8_t *_dstV, \
+ const uint8_t *unused0, \
const uint8_t *_src1, \
const uint8_t *_src2, \
int width, \
- uint32_t *unused) \
+ uint32_t *rgb2yuv) \
{ \
const uint16_t *src1 = (const uint16_t *)_src1, \
*src2 = (const uint16_t *)_src2; \
uint16_t *dstU = (uint16_t *)_dstU, \
*dstV = (uint16_t *)_dstV; \
- rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
+ rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
}
rgb48funcs(rgb, LE, AV_PIX_FMT_RGB48LE)
@@ -162,7 +249,7 @@ rgb48funcs(bgr, BE, AV_PIX_FMT_BGR48BE)
: (isBE(origin) ? AV_RB16(&src[(i) * 2]) \
: AV_RL16(&src[(i) * 2])))
-static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst,
+static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
const uint8_t *src,
int width,
enum AVPixelFormat origin,
@@ -170,10 +257,11 @@ static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst,
int shb, int shp,
int maskr, int maskg,
int maskb, int rsh,
- int gsh, int bsh, int S)
+ int gsh, int bsh, int S,
+ int32_t *rgb2yuv)
{
- const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
- const unsigned rnd = 33u << (S - 1);
+ const int ry = rgb2yuv[RY_IDX]<<rsh, gy = rgb2yuv[GY_IDX]<<gsh, by = rgb2yuv[BY_IDX]<<bsh;
+ const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
int i;
for (i = 0; i < width; i++) {
@@ -182,12 +270,12 @@ static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst,
int g = (px & maskg) >> shg;
int r = (px & maskr) >> shr;
- dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
+ dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
}
}
-static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU,
- uint8_t *dstV,
+static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
+ int16_t *dstV,
const uint8_t *src,
int width,
enum AVPixelFormat origin,
@@ -195,11 +283,12 @@ static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU,
int shb, int shp,
int maskr, int maskg,
int maskb, int rsh,
- int gsh, int bsh, int S)
+ int gsh, int bsh, int S,
+ int32_t *rgb2yuv)
{
- const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
- rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
- const unsigned rnd = 257u << (S - 1);
+ const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh,
+ rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh;
+ const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
int i;
for (i = 0; i < width; i++) {
@@ -208,13 +297,13 @@ static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU,
int g = (px & maskg) >> shg;
int r = (px & maskr) >> shr;
- dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
- dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
+ dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
+ dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
}
}
-static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
- uint8_t *dstV,
+static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
+ int16_t *dstV,
const uint8_t *src,
int width,
enum AVPixelFormat origin,
@@ -222,20 +311,21 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
int shb, int shp,
int maskr, int maskg,
int maskb, int rsh,
- int gsh, int bsh, int S)
+ int gsh, int bsh, int S,
+ int32_t *rgb2yuv)
{
- const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
- rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
+ const int ru = rgb2yuv[RU_IDX] << rsh, gu = rgb2yuv[GU_IDX] << gsh, bu = rgb2yuv[BU_IDX] << bsh,
+ rv = rgb2yuv[RV_IDX] << rsh, gv = rgb2yuv[GV_IDX] << gsh, bv = rgb2yuv[BV_IDX] << bsh,
maskgx = ~(maskr | maskb);
- const unsigned rnd = 257u << S;
+ const unsigned rnd = (256U<<(S)) + (1<<(S-6));
int i;
maskr |= maskr << 1;
maskb |= maskb << 1;
maskg |= maskg << 1;
for (i = 0; i < width; i++) {
- int px0 = input_pixel(2 * i + 0) >> shp;
- int px1 = input_pixel(2 * i + 1) >> shp;
+ unsigned px0 = input_pixel(2 * i + 0) >> shp;
+ unsigned px1 = input_pixel(2 * i + 1) >> shp;
int b, r, g = (px0 & maskgx) + (px1 & maskgx);
int rb = px0 + px1 - g;
@@ -249,8 +339,8 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
}
r = (rb & maskr) >> shr;
- dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
- dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
+ dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
+ dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
}
}
@@ -258,31 +348,31 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
maskg, maskb, rsh, gsh, bsh, S) \
-static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
- int width, uint32_t *unused) \
+static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
+ int width, uint32_t *tab) \
{ \
- rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
- maskr, maskg, maskb, rsh, gsh, bsh, S); \
+ rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \
+ maskr, maskg, maskb, rsh, gsh, bsh, S, tab); \
} \
\
static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
- const uint8_t *src, const uint8_t *dummy, \
- int width, uint32_t *unused) \
+ const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
+ int width, uint32_t *tab) \
{ \
- rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, \
+ rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
shr, shg, shb, shp, \
- maskr, maskg, maskb, rsh, gsh, bsh, S); \
+ maskr, maskg, maskb, rsh, gsh, bsh, S, tab);\
} \
\
static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
- const uint8_t *src, \
+ const uint8_t *unused0, const uint8_t *src, \
const uint8_t *dummy, \
- int width, uint32_t *unused) \
+ int width, uint32_t *tab) \
{ \
- rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, \
+ rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
shr, shg, shb, shp, \
maskr, maskg, maskb, \
- rsh, gsh, bsh, S); \
+ rsh, gsh, bsh, S, tab); \
}
rgb16_32_wrapper(AV_PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT + 8)
@@ -302,71 +392,127 @@ rgb16_32_wrapper(AV_PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0,
rgb16_32_wrapper(AV_PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
rgb16_32_wrapper(AV_PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
-static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width,
- uint32_t *unused)
+static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
+ const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
+ int width, uint32_t *rgb2yuv)
{
+ uint16_t *dstU = (uint16_t *)_dstU;
+ uint16_t *dstV = (uint16_t *)_dstV;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+
int i;
- for (i = 0; i < width; i++)
- dst[i] = src[4 * i];
+ for (i = 0; i < width; i++) {
+ unsigned int g = gsrc[2*i] + gsrc[2*i+1];
+ unsigned int b = bsrc[2*i] + bsrc[2*i+1];
+ unsigned int r = rsrc[2*i] + rsrc[2*i+1];
+
+ dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
+ dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
+ }
}
-static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width,
- uint32_t *unused)
+static void rgba64ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
+ const uint8_t *unused2, int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
+ const uint16_t *src = (const uint16_t *)_src;
int i;
for (i = 0; i < width; i++)
dst[i] = src[4 * i + 3];
}
-static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
+static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
+{
+ int16_t *dst = (int16_t *)_dst;
+ int i;
+ for (i=0; i<width; i++) {
+ dst[i]= src[4*i]<<6;
+ }
+}
+
+static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
+{
+ int16_t *dst = (int16_t *)_dst;
+ int i;
+ for (i=0; i<width; i++) {
+ dst[i]= src[4*i+3]<<6;
+ }
+}
+
+static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
{
+ int16_t *dst = (int16_t *)_dst;
+ int i;
+ for (i=0; i<width; i++) {
+ int d= src[i];
+
+ dst[i]= (pal[d] >> 24)<<6;
+ }
+}
+
+static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
+{
+ int16_t *dst = (int16_t *)_dst;
int i;
for (i = 0; i < width; i++) {
int d = src[i];
- dst[i] = pal[d] & 0xFF;
+ dst[i] = (pal[d] & 0xFF)<<6;
}
}
-static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV,
+ const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *pal)
{
+ uint16_t *dstU = (uint16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
- assert(src1 == src2);
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int p = pal[src1[i]];
- dstU[i] = p >> 8;
- dstV[i] = p >> 16;
+ dstU[i] = (uint8_t)(p>> 8)<<6;
+ dstV[i] = (uint8_t)(p>>16)<<6;
}
}
-static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
+static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
int i, j;
width = (width + 7) >> 3;
for (i = 0; i < width; i++) {
int d = ~src[i];
for (j = 0; j < 8; j++)
- dst[8 * i + j] = ((d >> (7 - j)) & 1) * 255;
+ dst[8*i+j]= ((d>>(7-j))&1) * 16383;
+ }
+ if(width&7){
+ int d= ~src[i];
+ for (j = 0; j < (width&7); j++)
+ dst[8*i+j]= ((d>>(7-j))&1) * 16383;
}
}
-static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
+static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
int i, j;
width = (width + 7) >> 3;
for (i = 0; i < width; i++) {
int d = src[i];
for (j = 0; j < 8; j++)
- dst[8 * i + j] = ((d >> (7 - j)) & 1) * 255;
+ dst[8*i+j]= ((d>>(7-j))&1) * 16383;
+ }
+ if(width&7){
+ int d = src[i];
+ for (j = 0; j < (width&7); j++)
+ dst[8*i+j] = ((d>>(7-j))&1) * 16383;
}
}
-static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -374,7 +520,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
dst[i] = src[2 * i];
}
-static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
int i;
@@ -382,10 +528,10 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
dstU[i] = src1[4 * i + 1];
dstV[i] = src1[4 * i + 3];
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
int i;
@@ -393,10 +539,10 @@ static void yvy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
dstV[i] = src1[4 * i + 1];
dstU[i] = src1[4 * i + 3];
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width,
+static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -406,7 +552,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width,
dst[i] = av_bswap16(src[i]);
}
-static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
+static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
const uint8_t *_src2, int width, uint32_t *unused)
{
int i;
@@ -419,7 +565,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
}
}
-static void read_ya16le_gray_c(uint8_t *dst, const uint8_t *src, int width,
+static void read_ya16le_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -427,7 +573,7 @@ static void read_ya16le_gray_c(uint8_t *dst, const uint8_t *src, int width,
AV_WN16(dst + i * 2, AV_RL16(src + i * 4));
}
-static void read_ya16le_alpha_c(uint8_t *dst, const uint8_t *src, int width,
+static void read_ya16le_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -435,7 +581,7 @@ static void read_ya16le_alpha_c(uint8_t *dst, const uint8_t *src, int width,
AV_WN16(dst + i * 2, AV_RL16(src + i * 4 + 2));
}
-static void read_ya16be_gray_c(uint8_t *dst, const uint8_t *src, int width,
+static void read_ya16be_gray_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -443,7 +589,7 @@ static void read_ya16be_gray_c(uint8_t *dst, const uint8_t *src, int width,
AV_WN16(dst + i * 2, AV_RB16(src + i * 4));
}
-static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, int width,
+static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -453,7 +599,7 @@ static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, int width,
/* This is almost identical to the previous, end exists only because
* yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */
-static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -461,7 +607,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
dst[i] = src[2 * i + 1];
}
-static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
int i;
@@ -469,7 +615,7 @@ static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
dstU[i] = src1[4 * i + 0];
dstV[i] = src1[4 * i + 2];
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
@@ -483,14 +629,14 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
}
static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+ const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *unused)
{
nvXXtoUV_c(dstU, dstV, src1, width);
}
static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+ const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *unused)
{
nvXXtoUV_c(dstV, dstU, src1, width);
@@ -498,218 +644,213 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
+static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
+ int width, uint32_t *rgb2yuv)
{
+ int16_t *dst = (int16_t *)_dst;
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
int i;
for (i = 0; i < width; i++) {
int b = src[i * 3 + 0];
int g = src[i * 3 + 1];
int r = src[i * 3 + 2];
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
-static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
- const uint8_t *src2, int width, uint32_t *unused)
+static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *rgb2yuv)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
int i;
for (i = 0; i < width; i++) {
int b = src1[3 * i + 0];
int g = src1[3 * i + 1];
int r = src1[3 * i + 2];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+ dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
- const uint8_t *src2, int width, uint32_t *unused)
+static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *rgb2yuv)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
for (i = 0; i < width; i++) {
int b = src1[6 * i + 0] + src1[6 * i + 3];
int g = src1[6 * i + 1] + src1[6 * i + 4];
int r = src1[6 * i + 2] + src1[6 * i + 5];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
- dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
+ dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+ dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
- uint32_t *unused)
+static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
+ uint32_t *rgb2yuv)
{
+ int16_t *dst = (int16_t *)_dst;
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
int i;
for (i = 0; i < width; i++) {
int r = src[i * 3 + 0];
int g = src[i * 3 + 1];
int b = src[i * 3 + 2];
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ dst[i] = ((ry*r + gy*g + by*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
-static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
- const uint8_t *src2, int width, uint32_t *unused)
+static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *rgb2yuv)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
- assert(src1 == src2);
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r = src1[3 * i + 0];
int g = src1[3 * i + 1];
int b = src1[3 * i + 2];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (ru*r + gu*g + bu*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+ dstV[i] = (rv*r + gv*g + bv*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
}
-static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
- const uint8_t *src2, int width, uint32_t *unused)
+static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *rgb2yuv)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
- assert(src1 == src2);
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r = src1[6 * i + 0] + src1[6 * i + 3];
int g = src1[6 * i + 1] + src1[6 * i + 4];
int b = src1[6 * i + 2] + src1[6 * i + 5];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
- dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
+ dstU[i] = (ru*r + gu*g + bu*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+ dstV[i] = (rv*r + gv*g + bv*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
}
-static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
+static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *rgb2yuv)
{
+ uint16_t *dst = (uint16_t *)_dst;
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
int i;
for (i = 0; i < width; i++) {
int g = src[0][i];
int b = src[1][i];
int r = src[2][i];
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ dst[i] = (ry*r + gy*g + by*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
-static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
+static void planar_rgb_to_a(uint8_t *_dst, const uint8_t *src[4], int width, int32_t *unused)
{
+ uint16_t *dst = (uint16_t *)_dst;
+ int i;
+ for (i = 0; i < width; i++)
+ dst[i] = src[3][i] << 6;
+}
+
+static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width, int32_t *rgb2yuv)
+{
+ uint16_t *dstU = (uint16_t *)_dstU;
+ uint16_t *dstV = (uint16_t *)_dstV;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
int i;
for (i = 0; i < width; i++) {
int g = src[0][i];
int b = src[1][i];
int r = src[2][i];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (ru*r + gu*g + bu*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+ dstV[i] = (rv*r + gv*g + bv*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
#define rdpx(src) \
is_be ? AV_RB16(src) : AV_RL16(src)
static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_src[4],
- int width, int bpc, int is_be)
+ int width, int bpc, int is_be, int32_t *rgb2yuv)
{
int i;
const uint16_t **src = (const uint16_t **)_src;
uint16_t *dst = (uint16_t *)_dst;
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+ int shift = bpc < 16 ? bpc : 14;
for (i = 0; i < width; i++) {
int g = rdpx(src[0] + i);
int b = rdpx(src[1] + i);
int r = rdpx(src[2] + i);
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> RGB2YUV_SHIFT);
+ dst[i] = ((ry*r + gy*g + by*b + (33 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14));
}
}
-static void planar_rgb9le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
-{
- planar_rgb16_to_y(dst, src, w, 9, 0);
-}
-
-static void planar_rgb9be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
-{
- planar_rgb16_to_y(dst, src, w, 9, 1);
-}
-
-static void planar_rgb10le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
-{
- planar_rgb16_to_y(dst, src, w, 10, 0);
-}
-
-static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
-{
- planar_rgb16_to_y(dst, src, w, 10, 1);
-}
-
-static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
-{
- planar_rgb16_to_y(dst, src, w, 16, 0);
-}
-
-static void planar_rgb16be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
-{
- planar_rgb16_to_y(dst, src, w, 16, 1);
-}
-
static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV,
const uint8_t *_src[4], int width,
- int bpc, int is_be)
+ int bpc, int is_be, int32_t *rgb2yuv)
{
int i;
const uint16_t **src = (const uint16_t **)_src;
uint16_t *dstU = (uint16_t *)_dstU;
uint16_t *dstV = (uint16_t *)_dstV;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ int shift = bpc < 16 ? bpc : 14;
for (i = 0; i < width; i++) {
int g = rdpx(src[0] + i);
int b = rdpx(src[1] + i);
int r = rdpx(src[2] + i);
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> RGB2YUV_SHIFT;
+ dstU[i] = (ru*r + gu*g + bu*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14);
+ dstV[i] = (rv*r + gv*g + bv*b + (257 << (RGB2YUV_SHIFT + bpc - 9))) >> (RGB2YUV_SHIFT + shift - 14);
}
}
#undef rdpx
-static void planar_rgb9le_to_uv(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src[4], int w)
-{
- planar_rgb16_to_uv(dstU, dstV, src, w, 9, 0);
-}
-
-static void planar_rgb9be_to_uv(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src[4], int w)
-{
- planar_rgb16_to_uv(dstU, dstV, src, w, 9, 1);
-}
-
-static void planar_rgb10le_to_uv(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src[4], int w)
-{
- planar_rgb16_to_uv(dstU, dstV, src, w, 10, 0);
-}
-
-static void planar_rgb10be_to_uv(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src[4], int w)
-{
- planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1);
-}
-
-static void planar_rgb16le_to_uv(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src[4], int w)
-{
- planar_rgb16_to_uv(dstU, dstV, src, w, 16, 0);
-}
-
-static void planar_rgb16be_to_uv(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src[4], int w)
-{
- planar_rgb16_to_uv(dstU, dstV, src, w, 16, 1);
-}
+#define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \
+static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \
+ int w, int32_t *rgb2yuv) \
+{ \
+ planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv); \
+} \
+static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *src[4], int w, int32_t *rgb2yuv) \
+{ \
+ planar_rgb16_to_uv(dstU, dstV, src, w, nbits, endian, rgb2yuv); \
+} \
+
+#define rgb9plus_planar_funcs(nbits) \
+ rgb9plus_planar_funcs_endian(nbits, le, 0) \
+ rgb9plus_planar_funcs_endian(nbits, be, 1)
+
+rgb9plus_planar_funcs(9)
+rgb9plus_planar_funcs(10)
+rgb9plus_planar_funcs(12)
+rgb9plus_planar_funcs(14)
+rgb9plus_planar_funcs(16)
av_cold void ff_sws_init_input_funcs(SwsContext *c)
{
@@ -745,6 +886,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_GBRP10LE:
c->readChrPlanar = planar_rgb10le_to_uv;
break;
+ case AV_PIX_FMT_GBRP12LE:
+ c->readChrPlanar = planar_rgb12le_to_uv;
+ break;
+ case AV_PIX_FMT_GBRP14LE:
+ c->readChrPlanar = planar_rgb14le_to_uv;
+ break;
+ case AV_PIX_FMT_GBRAP16LE:
case AV_PIX_FMT_GBRP16LE:
c->readChrPlanar = planar_rgb16le_to_uv;
break;
@@ -754,9 +902,17 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_GBRP10BE:
c->readChrPlanar = planar_rgb10be_to_uv;
break;
+ case AV_PIX_FMT_GBRP12BE:
+ c->readChrPlanar = planar_rgb12be_to_uv;
+ break;
+ case AV_PIX_FMT_GBRP14BE:
+ c->readChrPlanar = planar_rgb14be_to_uv;
+ break;
+ case AV_PIX_FMT_GBRAP16BE:
case AV_PIX_FMT_GBRP16BE:
c->readChrPlanar = planar_rgb16be_to_uv;
break;
+ case AV_PIX_FMT_GBRAP:
case AV_PIX_FMT_GBRP:
c->readChrPlanar = planar_rgb_to_uv;
break;
@@ -767,14 +923,21 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV422P10LE:
case AV_PIX_FMT_YUV444P10LE:
case AV_PIX_FMT_YUV420P10LE:
+ case AV_PIX_FMT_YUV422P12LE:
+ case AV_PIX_FMT_YUV444P12LE:
+ case AV_PIX_FMT_YUV420P12LE:
+ case AV_PIX_FMT_YUV422P14LE:
+ case AV_PIX_FMT_YUV444P14LE:
+ case AV_PIX_FMT_YUV420P14LE:
case AV_PIX_FMT_YUV420P16LE:
case AV_PIX_FMT_YUV422P16LE:
case AV_PIX_FMT_YUV444P16LE:
+
case AV_PIX_FMT_YUVA444P9LE:
case AV_PIX_FMT_YUVA422P9LE:
case AV_PIX_FMT_YUVA420P9LE:
- case AV_PIX_FMT_YUVA422P10LE:
case AV_PIX_FMT_YUVA444P10LE:
+ case AV_PIX_FMT_YUVA422P10LE:
case AV_PIX_FMT_YUVA420P10LE:
case AV_PIX_FMT_YUVA420P16LE:
case AV_PIX_FMT_YUVA422P16LE:
@@ -788,14 +951,21 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV444P10BE:
case AV_PIX_FMT_YUV422P10BE:
case AV_PIX_FMT_YUV420P10BE:
+ case AV_PIX_FMT_YUV444P12BE:
+ case AV_PIX_FMT_YUV422P12BE:
+ case AV_PIX_FMT_YUV420P12BE:
+ case AV_PIX_FMT_YUV444P14BE:
+ case AV_PIX_FMT_YUV422P14BE:
+ case AV_PIX_FMT_YUV420P14BE:
case AV_PIX_FMT_YUV420P16BE:
case AV_PIX_FMT_YUV422P16BE:
case AV_PIX_FMT_YUV444P16BE:
+
case AV_PIX_FMT_YUVA444P9BE:
case AV_PIX_FMT_YUVA422P9BE:
case AV_PIX_FMT_YUVA420P9BE:
- case AV_PIX_FMT_YUVA422P10BE:
case AV_PIX_FMT_YUVA444P10BE:
+ case AV_PIX_FMT_YUVA422P10BE:
case AV_PIX_FMT_YUVA420P10BE:
case AV_PIX_FMT_YUVA420P16BE:
case AV_PIX_FMT_YUVA422P16BE:
@@ -806,6 +976,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
}
if (c->chrSrcHSubSample) {
switch (srcFormat) {
+ case AV_PIX_FMT_RGBA64BE:
+ c->chrToYV12 = rgb64BEToUV_half_c;
+ break;
+ case AV_PIX_FMT_RGBA64LE:
+ c->chrToYV12 = rgb64LEToUV_half_c;
+ break;
+ case AV_PIX_FMT_BGRA64BE:
+ c->chrToYV12 = bgr64BEToUV_half_c;
+ break;
+ case AV_PIX_FMT_BGRA64LE:
+ c->chrToYV12 = bgr64LEToUV_half_c;
+ break;
case AV_PIX_FMT_RGB48BE:
c->chrToYV12 = rgb48BEToUV_half_c;
break;
@@ -839,6 +1021,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_BGR555BE:
c->chrToYV12 = bgr15beToUV_half_c;
break;
+ case AV_PIX_FMT_GBRAP:
+ case AV_PIX_FMT_GBRP:
+ c->chrToYV12 = gbr24pToUV_half_c;
+ break;
case AV_PIX_FMT_BGR444LE:
c->chrToYV12 = bgr12leToUV_half_c;
break;
@@ -875,6 +1061,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
}
} else {
switch (srcFormat) {
+ case AV_PIX_FMT_RGBA64BE:
+ c->chrToYV12 = rgb64BEToUV_c;
+ break;
+ case AV_PIX_FMT_RGBA64LE:
+ c->chrToYV12 = rgb64LEToUV_c;
+ break;
+ case AV_PIX_FMT_BGRA64BE:
+ c->chrToYV12 = bgr64BEToUV_c;
+ break;
+ case AV_PIX_FMT_BGRA64LE:
+ c->chrToYV12 = bgr64LEToUV_c;
+ break;
case AV_PIX_FMT_RGB48BE:
c->chrToYV12 = rgb48BEToUV_c;
break;
@@ -953,6 +1151,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_GBRP10LE:
c->readLumPlanar = planar_rgb10le_to_y;
break;
+ case AV_PIX_FMT_GBRP12LE:
+ c->readLumPlanar = planar_rgb12le_to_y;
+ break;
+ case AV_PIX_FMT_GBRP14LE:
+ c->readLumPlanar = planar_rgb14le_to_y;
+ break;
+ case AV_PIX_FMT_GBRAP16LE:
case AV_PIX_FMT_GBRP16LE:
c->readLumPlanar = planar_rgb16le_to_y;
break;
@@ -962,9 +1167,18 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_GBRP10BE:
c->readLumPlanar = planar_rgb10be_to_y;
break;
+ case AV_PIX_FMT_GBRP12BE:
+ c->readLumPlanar = planar_rgb12be_to_y;
+ break;
+ case AV_PIX_FMT_GBRP14BE:
+ c->readLumPlanar = planar_rgb14be_to_y;
+ break;
+ case AV_PIX_FMT_GBRAP16BE:
case AV_PIX_FMT_GBRP16BE:
c->readLumPlanar = planar_rgb16be_to_y;
break;
+ case AV_PIX_FMT_GBRAP:
+ c->readAlpPlanar = planar_rgb_to_a;
case AV_PIX_FMT_GBRP:
c->readLumPlanar = planar_rgb_to_y;
break;
@@ -975,9 +1189,16 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV444P10LE:
case AV_PIX_FMT_YUV422P10LE:
case AV_PIX_FMT_YUV420P10LE:
+ case AV_PIX_FMT_YUV444P12LE:
+ case AV_PIX_FMT_YUV422P12LE:
+ case AV_PIX_FMT_YUV420P12LE:
+ case AV_PIX_FMT_YUV444P14LE:
+ case AV_PIX_FMT_YUV422P14LE:
+ case AV_PIX_FMT_YUV420P14LE:
case AV_PIX_FMT_YUV420P16LE:
case AV_PIX_FMT_YUV422P16LE:
case AV_PIX_FMT_YUV444P16LE:
+
case AV_PIX_FMT_GRAY16LE:
c->lumToYV12 = bswap16Y_c;
break;
@@ -1000,9 +1221,16 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YUV444P10BE:
case AV_PIX_FMT_YUV422P10BE:
case AV_PIX_FMT_YUV420P10BE:
+ case AV_PIX_FMT_YUV444P12BE:
+ case AV_PIX_FMT_YUV422P12BE:
+ case AV_PIX_FMT_YUV420P12BE:
+ case AV_PIX_FMT_YUV444P14BE:
+ case AV_PIX_FMT_YUV422P14BE:
+ case AV_PIX_FMT_YUV420P14BE:
case AV_PIX_FMT_YUV420P16BE:
case AV_PIX_FMT_YUV422P16BE:
case AV_PIX_FMT_YUV444P16BE:
+
case AV_PIX_FMT_GRAY16BE:
c->lumToYV12 = bswap16Y_c;
break;
@@ -1114,9 +1342,28 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_BGR48LE:
c->lumToYV12 = bgr48LEToY_c;
break;
+ case AV_PIX_FMT_RGBA64BE:
+ c->lumToYV12 = rgb64BEToY_c;
+ break;
+ case AV_PIX_FMT_RGBA64LE:
+ c->lumToYV12 = rgb64LEToY_c;
+ break;
+ case AV_PIX_FMT_BGRA64BE:
+ c->lumToYV12 = bgr64BEToY_c;
+ break;
+ case AV_PIX_FMT_BGRA64LE:
+ c->lumToYV12 = bgr64LEToY_c;
}
if (c->alpPixBuf) {
+ if (is16BPS(srcFormat) || isNBPS(srcFormat)) {
+ if (HAVE_BIGENDIAN == !isBE(srcFormat))
+ c->alpToYV12 = bswap16Y_c;
+ }
switch (srcFormat) {
+ case AV_PIX_FMT_BGRA64LE:
+ case AV_PIX_FMT_BGRA64BE:
+ case AV_PIX_FMT_RGBA64LE:
+ case AV_PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break;
case AV_PIX_FMT_BGRA:
case AV_PIX_FMT_RGBA:
c->alpToYV12 = rgbaToA_c;
@@ -1128,6 +1375,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case AV_PIX_FMT_YA8:
c->alpToYV12 = uyvyToY_c;
break;
+ case AV_PIX_FMT_PAL8 :
+ c->alpToYV12 = palToA_c;
+ break;
}
}
}
diff --git a/libswscale/options.c b/libswscale/options.c
index e7765d6..5433d55 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -1,20 +1,20 @@
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -34,7 +34,7 @@ static const char *sws_context_to_name(void *ptr)
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
static const AVOption swscale_options[] = {
- { "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = DEFAULT }, 0, UINT_MAX, VE, "sws_flags" },
+ { "sws_flags", "scaler flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, { .i64 = SWS_BICUBIC }, 0, UINT_MAX, VE, "sws_flags" },
{ "fast_bilinear", "fast bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FAST_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bilinear", "bilinear", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BILINEAR }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bicubic", "bicubic", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BICUBIC }, INT_MIN, INT_MAX, VE, "sws_flags" },
@@ -51,6 +51,7 @@ static const AVOption swscale_options[] = {
{ "full_chroma_int", "full chroma interpolation", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "full_chroma_inp", "full chroma input", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bitexact", "", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
+ { "error_diffusion", "error diffusion dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_ERROR_DIFFUSION}, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "srcw", "source width", OFFSET(srcW), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE },
{ "srch", "source height", OFFSET(srcH), AV_OPT_TYPE_INT, { .i64 = 16 }, 1, INT_MAX, VE },
@@ -63,10 +64,28 @@ static const AVOption swscale_options[] = {
{ "param0", "scaler param 0", OFFSET(param[0]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE },
{ "param1", "scaler param 1", OFFSET(param[1]), AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT }, INT_MIN, INT_MAX, VE },
+ { "src_v_chr_pos", "source vertical chroma position in luma grid/256" , OFFSET(src_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE },
+ { "src_h_chr_pos", "source horizontal chroma position in luma grid/256", OFFSET(src_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE },
+ { "dst_v_chr_pos", "destination vertical chroma position in luma grid/256" , OFFSET(dst_v_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE },
+ { "dst_h_chr_pos", "destination horizontal chroma position in luma grid/256", OFFSET(dst_h_chr_pos), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 512, VE },
+
+ { "sws_dither", "set dithering algorithm", OFFSET(dither), AV_OPT_TYPE_INT, { .i64 = SWS_DITHER_AUTO }, 0, NB_SWS_DITHER, VE, "sws_dither" },
+ { "auto", "leave choice to sws", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_AUTO }, INT_MIN, INT_MAX, VE, "sws_dither" },
+ { "bayer", "bayer dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_BAYER }, INT_MIN, INT_MAX, VE, "sws_dither" },
+ { "ed", "error diffusion", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_ED }, INT_MIN, INT_MAX, VE, "sws_dither" },
+ { "a_dither", "arithmetic addition dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_A_DITHER}, INT_MIN, INT_MAX, VE, "sws_dither" },
+ { "x_dither", "arithmetic xor dither", 0, AV_OPT_TYPE_CONST, { .i64 = SWS_DITHER_X_DITHER}, INT_MIN, INT_MAX, VE, "sws_dither" },
+
{ NULL }
};
-const AVClass sws_context_class = { "SWScaler", sws_context_to_name, swscale_options };
+const AVClass sws_context_class = {
+ .class_name = "SWScaler",
+ .item_name = sws_context_to_name,
+ .option = swscale_options,
+ .category = AV_CLASS_CATEGORY_SWSCALER,
+ .version = LIBAVUTIL_VERSION_INT,
+};
const AVClass *sws_get_class(void)
{
diff --git a/libswscale/output.c b/libswscale/output.c
index 125d998..eee6b48 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1,24 +1,23 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
@@ -26,6 +25,7 @@
#include "libavutil/attributes.h"
#include "libavutil/avutil.h"
+#include "libavutil/avassert.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
@@ -36,24 +36,27 @@
#include "swscale.h"
#include "swscale_internal.h"
-DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = {
{ 1, 3, 1, 3, 1, 3, 1, 3, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
+{ 1, 3, 1, 3, 1, 3, 1, 3, },
};
-DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = {
{ 6, 2, 6, 2, 6, 2, 6, 2, },
{ 0, 4, 0, 4, 0, 4, 0, 4, },
+{ 6, 2, 6, 2, 6, 2, 6, 2, },
};
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[4][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = {
{ 8, 4, 11, 7, 8, 4, 11, 7, },
{ 2, 14, 1, 13, 2, 14, 1, 13, },
{ 10, 6, 9, 5, 10, 6, 9, 5, },
{ 0, 12, 3, 15, 0, 12, 3, 15, },
+{ 8, 4, 11, 7, 8, 4, 11, 7, },
};
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = {
{ 17, 9, 23, 15, 16, 8, 22, 14, },
{ 5, 29, 3, 27, 4, 28, 2, 26, },
{ 21, 13, 19, 11, 20, 12, 18, 10, },
@@ -62,9 +65,10 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[8][8] = {
{ 4, 28, 2, 26, 5, 29, 3, 27, },
{ 20, 12, 18, 10, 21, 13, 19, 11, },
{ 1, 25, 7, 31, 0, 24, 6, 30, },
+{ 17, 9, 23, 15, 16, 8, 22, 14, },
};
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = {
{ 0, 55, 14, 68, 3, 58, 17, 72, },
{ 37, 18, 50, 32, 40, 22, 54, 35, },
{ 9, 64, 5, 59, 13, 67, 8, 63, },
@@ -73,10 +77,11 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[8][8] = {
{ 39, 21, 52, 34, 38, 19, 51, 33, },
{ 11, 66, 7, 62, 10, 65, 6, 60, },
{ 48, 30, 43, 25, 47, 29, 42, 24, },
+{ 0, 55, 14, 68, 3, 58, 17, 72, },
};
#if 1
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
{117, 62, 158, 103, 113, 58, 155, 100, },
{ 34, 199, 21, 186, 31, 196, 17, 182, },
{144, 89, 131, 76, 141, 86, 127, 72, },
@@ -85,10 +90,11 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
{ 28, 193, 14, 179, 38, 203, 24, 189, },
{138, 83, 124, 69, 148, 93, 134, 79, },
{ 7, 172, 48, 213, 3, 168, 45, 210, },
+{117, 62, 158, 103, 113, 58, 155, 100, },
};
#elif 1
// tries to correct a gamma of 1.5
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
{ 0, 143, 18, 200, 2, 156, 25, 215, },
{ 78, 28, 125, 64, 89, 36, 138, 74, },
{ 10, 180, 3, 161, 16, 195, 8, 175, },
@@ -97,10 +103,11 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
{ 85, 33, 134, 71, 81, 30, 130, 67, },
{ 14, 190, 6, 171, 12, 185, 5, 166, },
{117, 57, 101, 44, 113, 54, 97, 41, },
+{ 0, 143, 18, 200, 2, 156, 25, 215, },
};
#elif 1
// tries to correct a gamma of 2.0
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
{ 0, 124, 8, 193, 0, 140, 12, 213, },
{ 55, 14, 104, 42, 66, 19, 119, 52, },
{ 3, 168, 1, 145, 6, 187, 3, 162, },
@@ -109,10 +116,11 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
{ 62, 17, 114, 48, 58, 16, 109, 45, },
{ 5, 181, 2, 157, 4, 175, 1, 151, },
{ 95, 36, 78, 26, 90, 34, 74, 24, },
+{ 0, 124, 8, 193, 0, 140, 12, 213, },
};
#else
// tries to correct a gamma of 2.5
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
{ 0, 107, 3, 187, 0, 125, 6, 212, },
{ 39, 7, 86, 28, 49, 11, 102, 36, },
{ 1, 158, 0, 131, 3, 180, 1, 151, },
@@ -121,6 +129,7 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[8][8] = {
{ 45, 9, 96, 33, 42, 8, 91, 30, },
{ 2, 172, 1, 144, 2, 165, 0, 137, },
{ 77, 23, 60, 15, 72, 21, 56, 14, },
+{ 0, 107, 3, 187, 0, 125, 6, 212, },
};
#endif
@@ -136,7 +145,8 @@ yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
int i;
- int shift = 19 - output_bits;
+ int shift = 3;
+ av_assert0(output_bits == 16);
for (i = 0; i < dstW; i++) {
int val = src[i] + (1 << (shift - 1));
@@ -150,10 +160,11 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
int big_endian, int output_bits)
{
int i;
- int shift = 15 + 16 - output_bits;
+ int shift = 15;
+ av_assert0(output_bits == 16);
for (i = 0; i < dstW; i++) {
- int val = 1 << (30-output_bits);
+ int val = 1 << (shift - 1);
int j;
/* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
@@ -163,7 +174,7 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
* reasonable filterSize), and re-add that at the end. */
val -= 0x40000000;
for (j = 0; j < filterSize; j++)
- val += src[j][i] * filter[j];
+ val += src[j][i] * (unsigned)filter[j];
output_pixel(&dest[i], val, 0x8000, int);
}
@@ -200,7 +211,7 @@ yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
int shift = 11 + 16 - output_bits;
for (i = 0; i < dstW; i++) {
- int val = 1 << (26-output_bits);
+ int val = 1 << (shift - 1);
int j;
for (j = 0; j < filterSize; j++)
@@ -232,6 +243,10 @@ yuv2NBPS( 9, BE, 1, 10, int16_t)
yuv2NBPS( 9, LE, 0, 10, int16_t)
yuv2NBPS(10, BE, 1, 10, int16_t)
yuv2NBPS(10, LE, 0, 10, int16_t)
+yuv2NBPS(12, BE, 1, 10, int16_t)
+yuv2NBPS(12, LE, 0, 10, int16_t)
+yuv2NBPS(14, BE, 1, 10, int16_t)
+yuv2NBPS(14, LE, 0, 10, int16_t)
yuv2NBPS(16, BE, 1, 16, int32_t)
yuv2NBPS(16, LE, 0, 16, int32_t)
@@ -317,6 +332,7 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
const uint8_t * const d128 = ff_dither_8x8_220[y&7];
int i;
unsigned acc = 0;
+ int err = 0;
for (i = 0; i < dstW; i += 2) {
int j;
@@ -333,12 +349,25 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
}
- accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
- accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
+ if (c->dither == SWS_DITHER_ED) {
+ Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
+ c->dither_error[0][i] = err;
+ acc = 2*acc + (Y1 >= 128);
+ Y1 -= 220*(acc&1);
+
+ err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4);
+ c->dither_error[0][i+1] = Y1;
+ acc = 2*acc + (err >= 128);
+ err -= 220*(acc&1);
+ } else {
+ accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
+ accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
+ }
if ((i & 7) == 6) {
output_pixel(*dest++, acc);
}
}
+ c->dither_error[0][i] = err;
if (i & 6) {
output_pixel(*dest, acc);
@@ -357,6 +386,29 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
int yalpha1 = 4096 - yalpha;
int i;
+ if (c->dither == SWS_DITHER_ED) {
+ int err = 0;
+ int acc = 0;
+ for (i = 0; i < dstW; i +=2) {
+ int Y;
+
+ Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
+ Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
+ c->dither_error[0][i] = err;
+ acc = 2*acc + (Y >= 128);
+ Y -= 220*(acc&1);
+
+ err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
+ err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
+ c->dither_error[0][i+1] = Y;
+ acc = 2*acc + (err >= 128);
+ err -= 220*(acc&1);
+
+ if ((i & 7) == 6)
+ output_pixel(*dest++, acc);
+ }
+ c->dither_error[0][i] = err;
+ } else {
for (i = 0; i < dstW; i += 8) {
int Y, acc = 0;
@@ -379,6 +431,7 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
output_pixel(*dest++, acc);
}
+ }
}
static av_always_inline void
@@ -390,20 +443,43 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
int i;
+ if (c->dither == SWS_DITHER_ED) {
+ int err = 0;
+ int acc = 0;
+ for (i = 0; i < dstW; i +=2) {
+ int Y;
+
+ Y = ((buf0[i + 0] + 64) >> 7);
+ Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
+ c->dither_error[0][i] = err;
+ acc = 2*acc + (Y >= 128);
+ Y -= 220*(acc&1);
+
+ err = ((buf0[i + 1] + 64) >> 7);
+ err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
+ c->dither_error[0][i+1] = Y;
+ acc = 2*acc + (err >= 128);
+ err -= 220*(acc&1);
+
+ if ((i & 7) == 6)
+ output_pixel(*dest++, acc);
+ }
+ c->dither_error[0][i] = err;
+ } else {
for (i = 0; i < dstW; i += 8) {
int acc = 0;
-
- accumulate_bit(acc, (buf0[i + 0] >> 7) + d128[0]);
- accumulate_bit(acc, (buf0[i + 1] >> 7) + d128[1]);
- accumulate_bit(acc, (buf0[i + 2] >> 7) + d128[2]);
- accumulate_bit(acc, (buf0[i + 3] >> 7) + d128[3]);
- accumulate_bit(acc, (buf0[i + 4] >> 7) + d128[4]);
- accumulate_bit(acc, (buf0[i + 5] >> 7) + d128[5]);
- accumulate_bit(acc, (buf0[i + 6] >> 7) + d128[6]);
- accumulate_bit(acc, (buf0[i + 7] >> 7) + d128[7]);
+ accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
+ accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
+ accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
+ accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
+ accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
+ accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
+ accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
+ accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
output_pixel(*dest++, acc);
}
+ }
}
#undef output_pixel
@@ -521,10 +597,12 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
+ if ((Y1 | Y2 | U | V) & 0x100) {
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+ }
output_pixels(i * 4, Y1, U, Y2, V);
}
@@ -541,10 +619,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) {
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = ubuf0[i] >> 7;
- int V = vbuf0[i] >> 7;
+ int Y1 = (buf0[i * 2 ]+64) >> 7;
+ int Y2 = (buf0[i * 2 + 1]+64) >> 7;
+ int U = (ubuf0[i] +64) >> 7;
+ int V = (vbuf0[i] +64) >> 7;
+
+ if ((Y1 | Y2 | U | V) & 0x100) {
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+ }
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
@@ -556,10 +641,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = (ubuf0[i] + ubuf1[i]) >> 8;
- int V = (vbuf0[i] + vbuf1[i]) >> 8;
+ int Y1 = (buf0[i * 2 ] + 64) >> 7;
+ int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
+ int U = (ubuf0[i] + ubuf1[i]+128) >> 8;
+ int V = (vbuf0[i] + vbuf1[i]+128) >> 8;
+
+ if ((Y1 | Y2 | U | V) & 0x100) {
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+ }
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
@@ -577,8 +669,8 @@ YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422)
YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
-#define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? R : B)
-#define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE) ? B : R)
+#define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
+#define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
#define output_pixel(pos, val) \
if (isBE(target)) { \
AV_WB16(pos, val); \
@@ -587,12 +679,231 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
}
static av_always_inline void
+yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
+ const int32_t **lumSrc, int lumFilterSize,
+ const int16_t *chrFilter, const int32_t **chrUSrc,
+ const int32_t **chrVSrc, int chrFilterSize,
+ const int32_t **alpSrc, uint16_t *dest, int dstW,
+ int y, enum AVPixelFormat target, int hasAlpha)
+{
+ int i;
+ int A1 = 0xffff<<14, A2 = 0xffff<<14;
+
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
+ int j;
+ int Y1 = -0x40000000;
+ int Y2 = -0x40000000;
+ int U = -128 << 23; // 19
+ int V = -128 << 23;
+ int R, G, B;
+
+ for (j = 0; j < lumFilterSize; j++) {
+ Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j];
+ Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
+ }
+ for (j = 0; j < chrFilterSize; j++) {;
+ U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+ V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+ }
+
+ if (hasAlpha) {
+ A1 = -0x40000000;
+ A2 = -0x40000000;
+ for (j = 0; j < lumFilterSize; j++) {
+ A1 += alpSrc[j][i * 2] * (unsigned)lumFilter[j];
+ A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
+ }
+ A1 >>= 1;
+ A1 += 0x20002000;
+ A2 >>= 1;
+ A2 += 0x20002000;
+ }
+
+ // 8bit: 12+15=27; 16-bit: 12+19=31
+ Y1 >>= 14; // 10
+ Y1 += 0x10000;
+ Y2 >>= 14;
+ Y2 += 0x10000;
+ U >>= 14;
+ V >>= 14;
+
+ // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
+ Y1 -= c->yuv2rgb_y_offset;
+ Y2 -= c->yuv2rgb_y_offset;
+ Y1 *= c->yuv2rgb_y_coeff;
+ Y2 *= c->yuv2rgb_y_coeff;
+ Y1 += 1 << 13; // 21
+ Y2 += 1 << 13;
+ // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
+
+ R = V * c->yuv2rgb_v2r_coeff;
+ G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+ B = U * c->yuv2rgb_u2b_coeff;
+
+ // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
+ output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
+ output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
+ output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
+ output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
+ output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+ output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
+ output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+ output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
+ dest += 8;
+ }
+}
+
+static av_always_inline void
+yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
+ const int32_t *ubuf[2], const int32_t *vbuf[2],
+ const int32_t *abuf[2], uint16_t *dest, int dstW,
+ int yalpha, int uvalpha, int y,
+ enum AVPixelFormat target, int hasAlpha)
+{
+ const int32_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+ *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
+ *abuf0 = hasAlpha ? abuf[0] : NULL,
+ *abuf1 = hasAlpha ? abuf[1] : NULL;
+ int yalpha1 = 4096 - yalpha;
+ int uvalpha1 = 4096 - uvalpha;
+ int i;
+ int A1 = 0xffff<<14, A2 = 0xffff<<14;
+
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
+ int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
+ int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
+ int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
+ int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
+ int R, G, B;
+
+ Y1 -= c->yuv2rgb_y_offset;
+ Y2 -= c->yuv2rgb_y_offset;
+ Y1 *= c->yuv2rgb_y_coeff;
+ Y2 *= c->yuv2rgb_y_coeff;
+ Y1 += 1 << 13;
+ Y2 += 1 << 13;
+
+ R = V * c->yuv2rgb_v2r_coeff;
+ G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+ B = U * c->yuv2rgb_u2b_coeff;
+
+ if (hasAlpha) {
+ A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 1;
+ A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1;
+
+ A1 += 1 << 13;
+ A2 += 1 << 13;
+ }
+
+ output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
+ output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
+ output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
+ output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
+ output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+ output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
+ output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+ output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
+ dest += 8;
+ }
+}
+
+static av_always_inline void
+yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
+ const int32_t *ubuf[2], const int32_t *vbuf[2],
+ const int32_t *abuf0, uint16_t *dest, int dstW,
+ int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
+{
+ const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+ int i;
+ int A1 = 0xffff<<14, A2= 0xffff<<14;
+
+ if (uvalpha < 2048) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
+ int Y1 = (buf0[i * 2] ) >> 2;
+ int Y2 = (buf0[i * 2 + 1]) >> 2;
+ int U = (ubuf0[i] + (-128 << 11)) >> 2;
+ int V = (vbuf0[i] + (-128 << 11)) >> 2;
+ int R, G, B;
+
+ Y1 -= c->yuv2rgb_y_offset;
+ Y2 -= c->yuv2rgb_y_offset;
+ Y1 *= c->yuv2rgb_y_coeff;
+ Y2 *= c->yuv2rgb_y_coeff;
+ Y1 += 1 << 13;
+ Y2 += 1 << 13;
+
+ if (hasAlpha) {
+ A1 = abuf0[i * 2 ] << 11;
+ A2 = abuf0[i * 2 + 1] << 11;
+
+ A1 += 1 << 13;
+ A2 += 1 << 13;
+ }
+
+ R = V * c->yuv2rgb_v2r_coeff;
+ G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+ B = U * c->yuv2rgb_u2b_coeff;
+
+ output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
+ output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
+ output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
+ output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
+ output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+ output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
+ output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+ output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
+ dest += 8;
+ }
+ } else {
+ const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
+ int A1 = 0xffff<<14, A2 = 0xffff<<14;
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
+ int Y1 = (buf0[i * 2] ) >> 2;
+ int Y2 = (buf0[i * 2 + 1]) >> 2;
+ int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
+ int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
+ int R, G, B;
+
+ Y1 -= c->yuv2rgb_y_offset;
+ Y2 -= c->yuv2rgb_y_offset;
+ Y1 *= c->yuv2rgb_y_coeff;
+ Y2 *= c->yuv2rgb_y_coeff;
+ Y1 += 1 << 13;
+ Y2 += 1 << 13;
+
+ if (hasAlpha) {
+ A1 = abuf0[i * 2 ] << 11;
+ A2 = abuf0[i * 2 + 1] << 11;
+
+ A1 += 1 << 13;
+ A2 += 1 << 13;
+ }
+
+ R = V * c->yuv2rgb_v2r_coeff;
+ G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
+ B = U * c->yuv2rgb_u2b_coeff;
+
+ output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
+ output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
+ output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
+ output_pixel(&dest[3], av_clip_uintp2(A1 , 30) >> 14);
+ output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+ output_pixel(&dest[5], av_clip_uintp2( G + Y2, 30) >> 14);
+ output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+ output_pixel(&dest[7], av_clip_uintp2(A2 , 30) >> 14);
+ dest += 8;
+ }
+ }
+}
+
+static av_always_inline void
yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int32_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int32_t **chrUSrc,
const int32_t **chrVSrc, int chrFilterSize,
const int32_t **alpSrc, uint16_t *dest, int dstW,
- int y, enum AVPixelFormat target)
+ int y, enum AVPixelFormat target, int hasAlpha)
{
int i;
@@ -605,12 +916,12 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
int R, G, B;
for (j = 0; j < lumFilterSize; j++) {
- Y1 += lumSrc[j][i * 2] * lumFilter[j];
- Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
+ Y1 += lumSrc[j][i * 2] * (unsigned)lumFilter[j];
+ Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
}
- for (j = 0; j < chrFilterSize; j++) {
- U += chrUSrc[j][i] * chrFilter[j];
- V += chrVSrc[j][i] * chrFilter[j];
+ for (j = 0; j < chrFilterSize; j++) {;
+ U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+ V += chrVSrc[j][i] * (unsigned)chrFilter[j];
}
// 8bit: 12+15=27; 16-bit: 12+19=31
@@ -650,7 +961,7 @@ yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf[2], uint16_t *dest, int dstW,
int yalpha, int uvalpha, int y,
- enum AVPixelFormat target)
+ enum AVPixelFormat target, int hasAlpha)
{
const int32_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -691,7 +1002,7 @@ static av_always_inline void
yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
const int32_t *ubuf[2], const int32_t *vbuf[2],
const int32_t *abuf0, uint16_t *dest, int dstW,
- int uvalpha, int y, enum AVPixelFormat target)
+ int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
{
const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
int i;
@@ -758,7 +1069,7 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
#undef r_b
#undef b_r
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
+#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha) \
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
const int16_t **_lumSrc, int lumFilterSize, \
const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -773,7 +1084,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
uint16_t *dest = (uint16_t *) _dest; \
name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
- alpSrc, dest, dstW, y, fmt); \
+ alpSrc, dest, dstW, y, fmt, hasAlpha); \
} \
\
static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
@@ -787,7 +1098,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
**abuf = (const int32_t **) _abuf; \
uint16_t *dest = (uint16_t *) _dest; \
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
- dest, dstW, yalpha, uvalpha, y, fmt); \
+ dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
} \
\
static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
@@ -801,13 +1112,21 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
*abuf0 = (const int32_t *) _abuf0; \
uint16_t *dest = (uint16_t *) _dest; \
name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
- dstW, uvalpha, y, fmt); \
+ dstW, uvalpha, y, fmt, hasAlpha); \
}
-YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE)
-YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE)
-YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE)
-YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE)
+YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0)
/*
* Write out 2 RGB pixels in the target pixel format. This function takes a
@@ -818,7 +1137,7 @@ YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE)
* correct RGB values into the destination buffer.
*/
static av_always_inline void
-yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
+yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
unsigned A1, unsigned A2,
const void *_r, const void *_g, const void *_b, int y,
enum AVPixelFormat target, int hasAlpha)
@@ -839,9 +1158,15 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
if (hasAlpha) {
int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
+ av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0);
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
} else {
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
+ int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
+
+ av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
+#endif
dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
}
@@ -854,6 +1179,7 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
#define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
#define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
+
dest[i * 6 + 0] = r_b[Y1];
dest[i * 6 + 1] = g[Y1];
dest[i * 6 + 2] = b_r[Y1];
@@ -872,19 +1198,19 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
int dr1, dg1, db1, dr2, dg2, db2;
if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
- dr1 = dither_2x2_8[ y & 1 ][0];
- dg1 = dither_2x2_4[ y & 1 ][0];
- db1 = dither_2x2_8[(y & 1) ^ 1][0];
- dr2 = dither_2x2_8[ y & 1 ][1];
- dg2 = dither_2x2_4[ y & 1 ][1];
- db2 = dither_2x2_8[(y & 1) ^ 1][1];
+ dr1 = ff_dither_2x2_8[ y & 1 ][0];
+ dg1 = ff_dither_2x2_4[ y & 1 ][0];
+ db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
+ dr2 = ff_dither_2x2_8[ y & 1 ][1];
+ dg2 = ff_dither_2x2_4[ y & 1 ][1];
+ db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
} else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
- dr1 = dither_2x2_8[ y & 1 ][0];
- dg1 = dither_2x2_8[ y & 1 ][1];
- db1 = dither_2x2_8[(y & 1) ^ 1][0];
- dr2 = dither_2x2_8[ y & 1 ][1];
- dg2 = dither_2x2_8[ y & 1 ][0];
- db2 = dither_2x2_8[(y & 1) ^ 1][1];
+ dr1 = ff_dither_2x2_8[ y & 1 ][0];
+ dg1 = ff_dither_2x2_8[ y & 1 ][1];
+ db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
+ dr2 = ff_dither_2x2_8[ y & 1 ][1];
+ dg2 = ff_dither_2x2_8[ y & 1 ][0];
+ db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
} else {
dr1 = ff_dither_4x4_16[ y & 3 ][0];
dg1 = ff_dither_4x4_16[ y & 3 ][1];
@@ -959,12 +1285,6 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
Y2 >>= 19;
U >>= 19;
V >>= 19;
- if ((Y1 | Y2 | U | V) & 0x100) {
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
- }
if (hasAlpha) {
A1 = 1 << 18;
A2 = 1 << 18;
@@ -980,10 +1300,9 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
}
}
- /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
+ g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
+ b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
r, g, b, y, target, hasAlpha);
@@ -1012,16 +1331,9 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
int A1, A2;
- const void *r, *g, *b;
-
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
-
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
+ *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
+ *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
if (hasAlpha) {
A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
@@ -1047,25 +1359,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) {
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = ubuf0[i] >> 7;
- int V = vbuf0[i] >> 7;
+ int Y1 = (buf0[i * 2 ] + 64) >> 7;
+ int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
+ int U = (ubuf0[i] + 64) >> 7;
+ int V = (vbuf0[i] + 64) >> 7;
int A1, A2;
- const void *r, *g, *b;
-
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
-
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
+ *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
+ *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
if (hasAlpha) {
- A1 = abuf0[i * 2 ] >> 7;
- A2 = abuf0[i * 2 + 1] >> 7;
+ A1 = abuf0[i * 2 ] * 255 + 16384 >> 15;
+ A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
A1 = av_clip_uint8(A1);
A2 = av_clip_uint8(A2);
}
@@ -1076,25 +1381,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = (ubuf0[i] + ubuf1[i]) >> 8;
- int V = (vbuf0[i] + vbuf1[i]) >> 8;
+ int Y1 = (buf0[i * 2 ] + 64) >> 7;
+ int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
+ int U = (ubuf0[i] + ubuf1[i] + 128) >> 8;
+ int V = (vbuf0[i] + vbuf1[i] + 128) >> 8;
int A1, A2;
- const void *r, *g, *b;
-
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
-
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
+ *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
+ *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
if (hasAlpha) {
- A1 = abuf0[i * 2 ] >> 7;
- A2 = abuf0[i * 2 + 1] >> 7;
+ A1 = (abuf0[i * 2 ] + 64) >> 7;
+ A2 = (abuf0[i * 2 + 1] + 64) >> 7;
A1 = av_clip_uint8(A1);
A2 = av_clip_uint8(A2);
}
@@ -1117,7 +1415,8 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
alpSrc, dest, dstW, y, fmt, hasAlpha); \
}
-#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
+
+#define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
const int16_t *ubuf[2], const int16_t *vbuf[2], \
@@ -1126,8 +1425,10 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
{ \
name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
-} \
- \
+}
+
+#define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
+YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
const int16_t *ubuf[2], const int16_t *vbuf[2], \
const int16_t *abuf0, uint8_t *dest, int dstW, \
@@ -1157,6 +1458,145 @@ YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0)
YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0)
YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0)
+static av_always_inline void yuv2rgb_write_full(SwsContext *c,
+ uint8_t *dest, int i, int Y, int A, int U, int V,
+ int y, enum AVPixelFormat target, int hasAlpha, int err[4])
+{
+ int R, G, B;
+ int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
+
+ Y -= c->yuv2rgb_y_offset;
+ Y *= c->yuv2rgb_y_coeff;
+ Y += 1 << 21;
+ R = Y + V*c->yuv2rgb_v2r_coeff;
+ G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
+ B = Y + U*c->yuv2rgb_u2b_coeff;
+ if ((R | G | B) & 0xC0000000) {
+ R = av_clip_uintp2(R, 30);
+ G = av_clip_uintp2(G, 30);
+ B = av_clip_uintp2(B, 30);
+ }
+
+ switch(target) {
+ case AV_PIX_FMT_ARGB:
+ dest[0] = hasAlpha ? A : 255;
+ dest[1] = R >> 22;
+ dest[2] = G >> 22;
+ dest[3] = B >> 22;
+ break;
+ case AV_PIX_FMT_RGB24:
+ dest[0] = R >> 22;
+ dest[1] = G >> 22;
+ dest[2] = B >> 22;
+ break;
+ case AV_PIX_FMT_RGBA:
+ dest[0] = R >> 22;
+ dest[1] = G >> 22;
+ dest[2] = B >> 22;
+ dest[3] = hasAlpha ? A : 255;
+ break;
+ case AV_PIX_FMT_ABGR:
+ dest[0] = hasAlpha ? A : 255;
+ dest[1] = B >> 22;
+ dest[2] = G >> 22;
+ dest[3] = R >> 22;
+ break;
+ case AV_PIX_FMT_BGR24:
+ dest[0] = B >> 22;
+ dest[1] = G >> 22;
+ dest[2] = R >> 22;
+ break;
+ case AV_PIX_FMT_BGRA:
+ dest[0] = B >> 22;
+ dest[1] = G >> 22;
+ dest[2] = R >> 22;
+ dest[3] = hasAlpha ? A : 255;
+ break;
+ case AV_PIX_FMT_BGR4_BYTE:
+ case AV_PIX_FMT_RGB4_BYTE:
+ case AV_PIX_FMT_BGR8:
+ case AV_PIX_FMT_RGB8:
+ {
+ int r,g,b;
+
+ switch (c->dither) {
+ default:
+ case SWS_DITHER_AUTO:
+ case SWS_DITHER_ED:
+ R >>= 22;
+ G >>= 22;
+ B >>= 22;
+ R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
+ G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
+ B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
+ c->dither_error[0][i] = err[0];
+ c->dither_error[1][i] = err[1];
+ c->dither_error[2][i] = err[2];
+ r = R >> (isrgb8 ? 5 : 7);
+ g = G >> (isrgb8 ? 5 : 6);
+ b = B >> (isrgb8 ? 6 : 7);
+ r = av_clip(r, 0, isrgb8 ? 7 : 1);
+ g = av_clip(g, 0, isrgb8 ? 7 : 3);
+ b = av_clip(b, 0, isrgb8 ? 3 : 1);
+ err[0] = R - r*(isrgb8 ? 36 : 255);
+ err[1] = G - g*(isrgb8 ? 36 : 85);
+ err[2] = B - b*(isrgb8 ? 85 : 255);
+ break;
+ case SWS_DITHER_A_DITHER:
+ if (isrgb8) {
+ /* see http://pippin.gimp.org/a_dither/ for details/origin */
+#define A_DITHER(u,v) (((((u)+((v)*236))*119)&0xff))
+ r = (((R >> 19) + A_DITHER(i,y) -96)>>8);
+ g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
+ b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
+ r = av_clip(r, 0, 7);
+ g = av_clip(g, 0, 7);
+ b = av_clip(b, 0, 3);
+ } else {
+ r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
+ g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
+ b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
+ r = av_clip(r, 0, 1);
+ g = av_clip(g, 0, 3);
+ b = av_clip(b, 0, 1);
+ }
+ break;
+ case SWS_DITHER_X_DITHER:
+ if (isrgb8) {
+ /* see http://pippin.gimp.org/a_dither/ for details/origin */
+#define X_DITHER(u,v) (((((u)^((v)*237))*181)&0x1ff)/2)
+ r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
+ g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
+ b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
+ r = av_clip(r, 0, 7);
+ g = av_clip(g, 0, 7);
+ b = av_clip(b, 0, 3);
+ } else {
+ r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
+ g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
+ b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
+ r = av_clip(r, 0, 1);
+ g = av_clip(g, 0, 3);
+ b = av_clip(b, 0, 1);
+ }
+
+ break;
+ }
+
+ if(target == AV_PIX_FMT_BGR4_BYTE) {
+ dest[0] = r + 2*g + 8*b;
+ } else if(target == AV_PIX_FMT_RGB4_BYTE) {
+ dest[0] = b + 2*g + 8*r;
+ } else if(target == AV_PIX_FMT_BGR8) {
+ dest[0] = r + 8*g + 64*b;
+ } else if(target == AV_PIX_FMT_RGB8) {
+ dest[0] = b + 4*g + 32*r;
+ } else
+ av_assert2(0);
+ break;}
+ }
+}
+
static av_always_inline void
yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
@@ -1167,13 +1607,18 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
{
int i;
int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
+ int err[4] = {0};
+ int A = 0; //init to silence warning
+
+ if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
+ || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
+ step = 1;
for (i = 0; i < dstW; i++) {
int j;
- int Y = 0;
- int U = -128 << 19;
- int V = -128 << 19;
- int R, G, B, A;
+ int Y = 1<<9;
+ int U = (1<<9)-(128 << 19);
+ int V = (1<<9)-(128 << 19);
for (j = 0; j < lumFilterSize; j++) {
Y += lumSrc[j][i] * lumFilter[j];
@@ -1186,7 +1631,7 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
U >>= 10;
V >>= 10;
if (hasAlpha) {
- A = 1 << 21;
+ A = 1 << 18;
for (j = 0; j < lumFilterSize; j++) {
A += alpSrc[j][i] * lumFilter[j];
}
@@ -1194,78 +1639,136 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
if (A & 0x100)
A = av_clip_uint8(A);
}
- Y -= c->yuv2rgb_y_offset;
- Y *= c->yuv2rgb_y_coeff;
- Y += 1 << 21;
- R = Y + V*c->yuv2rgb_v2r_coeff;
- G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
- B = Y + U*c->yuv2rgb_u2b_coeff;
- if ((R | G | B) & 0xC0000000) {
- R = av_clip_uintp2(R, 30);
- G = av_clip_uintp2(G, 30);
- B = av_clip_uintp2(B, 30);
- }
+ yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
+ dest += step;
+ }
+ c->dither_error[0][i] = err[0];
+ c->dither_error[1][i] = err[1];
+ c->dither_error[2][i] = err[2];
+}
- switch(target) {
- case AV_PIX_FMT_ARGB:
- dest[0] = hasAlpha ? A : 255;
- dest[1] = R >> 22;
- dest[2] = G >> 22;
- dest[3] = B >> 22;
- break;
- case AV_PIX_FMT_RGB24:
- dest[0] = R >> 22;
- dest[1] = G >> 22;
- dest[2] = B >> 22;
- break;
- case AV_PIX_FMT_RGBA:
- dest[0] = R >> 22;
- dest[1] = G >> 22;
- dest[2] = B >> 22;
- dest[3] = hasAlpha ? A : 255;
- break;
- case AV_PIX_FMT_ABGR:
- dest[0] = hasAlpha ? A : 255;
- dest[1] = B >> 22;
- dest[2] = G >> 22;
- dest[3] = R >> 22;
- dest += 4;
- break;
- case AV_PIX_FMT_BGR24:
- dest[0] = B >> 22;
- dest[1] = G >> 22;
- dest[2] = R >> 22;
- break;
- case AV_PIX_FMT_BGRA:
- dest[0] = B >> 22;
- dest[1] = G >> 22;
- dest[2] = R >> 22;
- dest[3] = hasAlpha ? A : 255;
- break;
+static av_always_inline void
+yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf[2], uint8_t *dest, int dstW,
+ int yalpha, int uvalpha, int y,
+ enum AVPixelFormat target, int hasAlpha)
+{
+ const int16_t *buf0 = buf[0], *buf1 = buf[1],
+ *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
+ *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
+ *abuf0 = hasAlpha ? abuf[0] : NULL,
+ *abuf1 = hasAlpha ? abuf[1] : NULL;
+ int yalpha1 = 4096 - yalpha;
+ int uvalpha1 = 4096 - uvalpha;
+ int i;
+ int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
+ int err[4] = {0};
+ int A = 0; // init to silcene warning
+
+ if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
+ || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
+ step = 1;
+
+ for (i = 0; i < dstW; i++) {
+ int Y = ( buf0[i] * yalpha1 + buf1[i] * yalpha ) >> 10; //FIXME rounding
+ int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10;
+ int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10;
+
+ if (hasAlpha) {
+ A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19;
+ if (A & 0x100)
+ A = av_clip_uint8(A);
}
+
+ yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
dest += step;
}
+ c->dither_error[0][i] = err[0];
+ c->dither_error[1][i] = err[1];
+ c->dither_error[2][i] = err[2];
+}
+
+static av_always_inline void
+yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
+ const int16_t *ubuf[2], const int16_t *vbuf[2],
+ const int16_t *abuf0, uint8_t *dest, int dstW,
+ int uvalpha, int y, enum AVPixelFormat target,
+ int hasAlpha)
+{
+ const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
+ int i;
+ int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
+ int err[4] = {0};
+
+ if( target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
+ || target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8)
+ step = 1;
+
+ if (uvalpha < 2048) {
+ int A = 0; //init to silence warning
+ for (i = 0; i < dstW; i++) {
+ int Y = buf0[i] << 2;
+ int U = (ubuf0[i] - (128<<7)) << 2;
+ int V = (vbuf0[i] - (128<<7)) << 2;
+
+ if (hasAlpha) {
+ A = (abuf0[i] + 64) >> 7;
+ if (A & 0x100)
+ A = av_clip_uint8(A);
+ }
+
+ yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
+ dest += step;
+ }
+ } else {
+ const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
+ int A = 0; //init to silence warning
+ for (i = 0; i < dstW; i++) {
+ int Y = buf0[i] << 2;
+ int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1;
+ int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1;
+
+ if (hasAlpha) {
+ A = (abuf0[i] + 64) >> 7;
+ if (A & 0x100)
+ A = av_clip_uint8(A);
+ }
+
+ yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
+ dest += step;
+ }
+ }
+
+ c->dither_error[0][i] = err[0];
+ c->dither_error[1][i] = err[1];
+ c->dither_error[2][i] = err[2];
}
#if CONFIG_SMALL
-YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
-YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
+YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
#else
#if CONFIG_SWSCALE_ALPHA
-YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1)
-YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1)
-YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1)
-YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA, 1)
+YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR, 1)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA, 1)
+YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB, 1)
#endif
-YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0)
-YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0)
-YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0)
-YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB, 0)
#endif
-YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0)
-YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full, AV_PIX_FMT_BGR24, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full, AV_PIX_FMT_RGB24, 0)
+
+YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full, AV_PIX_FMT_BGR4_BYTE, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full, AV_PIX_FMT_RGB4_BYTE, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full, AV_PIX_FMT_BGR8, 0)
+YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0)
static void
yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
@@ -1277,16 +1780,17 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
int i;
- int hasAlpha = 0;
+ int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
uint16_t **dest16 = (uint16_t**)dest;
int SH = 22 + 7 - desc->comp[0].depth_minus1;
+ int A = 0; // init to silence warning
for (i = 0; i < dstW; i++) {
int j;
int Y = 1 << 9;
int U = (1 << 9) - (128 << 19);
int V = (1 << 9) - (128 << 19);
- int R, G, B, A;
+ int R, G, B;
for (j = 0; j < lumFilterSize; j++)
Y += lumSrc[j][i] * lumFilter[j];
@@ -1329,10 +1833,14 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
dest16[0][i] = G >> SH;
dest16[1][i] = B >> SH;
dest16[2][i] = R >> SH;
+ if (hasAlpha)
+ dest16[3][i] = A;
} else {
dest[0][i] = G >> 22;
dest[1][i] = B >> 22;
dest[2][i] = R >> 22;
+ if (hasAlpha)
+ dest[3][i] = A;
}
}
if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
@@ -1340,6 +1848,8 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
dest16[0][i] = av_bswap16(dest16[0][i]);
dest16[1][i] = av_bswap16(dest16[1][i]);
dest16[2][i] = av_bswap16(dest16[2][i]);
+ if (hasAlpha)
+ dest16[3][i] = av_bswap16(dest16[3][i]);
}
}
}
@@ -1363,10 +1873,17 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
if (desc->comp[0].depth_minus1 == 8) {
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
- } else {
+ } else if (desc->comp[0].depth_minus1 == 9) {
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
- }
+ } else if (desc->comp[0].depth_minus1 == 11) {
+ *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c : yuv2planeX_12LE_c;
+ *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c : yuv2plane1_12LE_c;
+ } else if (desc->comp[0].depth_minus1 == 13) {
+ *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c : yuv2planeX_14LE_c;
+ *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c : yuv2plane1_14LE_c;
+ } else
+ av_assert0(0);
} else {
*yuv2plane1 = yuv2plane1_8_c;
*yuv2planeX = yuv2planeX_8_c;
@@ -1379,77 +1896,189 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
case AV_PIX_FMT_RGBA:
#if CONFIG_SMALL
*yuv2packedX = yuv2rgba32_full_X_c;
+ *yuv2packed2 = yuv2rgba32_full_2_c;
+ *yuv2packed1 = yuv2rgba32_full_1_c;
#else
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2rgba32_full_X_c;
+ *yuv2packed2 = yuv2rgba32_full_2_c;
+ *yuv2packed1 = yuv2rgba32_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2rgbx32_full_X_c;
+ *yuv2packed2 = yuv2rgbx32_full_2_c;
+ *yuv2packed1 = yuv2rgbx32_full_1_c;
}
#endif /* !CONFIG_SMALL */
break;
case AV_PIX_FMT_ARGB:
#if CONFIG_SMALL
*yuv2packedX = yuv2argb32_full_X_c;
+ *yuv2packed2 = yuv2argb32_full_2_c;
+ *yuv2packed1 = yuv2argb32_full_1_c;
#else
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2argb32_full_X_c;
+ *yuv2packed2 = yuv2argb32_full_2_c;
+ *yuv2packed1 = yuv2argb32_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2xrgb32_full_X_c;
+ *yuv2packed2 = yuv2xrgb32_full_2_c;
+ *yuv2packed1 = yuv2xrgb32_full_1_c;
}
#endif /* !CONFIG_SMALL */
break;
case AV_PIX_FMT_BGRA:
#if CONFIG_SMALL
*yuv2packedX = yuv2bgra32_full_X_c;
+ *yuv2packed2 = yuv2bgra32_full_2_c;
+ *yuv2packed1 = yuv2bgra32_full_1_c;
#else
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2bgra32_full_X_c;
+ *yuv2packed2 = yuv2bgra32_full_2_c;
+ *yuv2packed1 = yuv2bgra32_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2bgrx32_full_X_c;
+ *yuv2packed2 = yuv2bgrx32_full_2_c;
+ *yuv2packed1 = yuv2bgrx32_full_1_c;
}
#endif /* !CONFIG_SMALL */
break;
case AV_PIX_FMT_ABGR:
#if CONFIG_SMALL
*yuv2packedX = yuv2abgr32_full_X_c;
+ *yuv2packed2 = yuv2abgr32_full_2_c;
+ *yuv2packed1 = yuv2abgr32_full_1_c;
#else
#if CONFIG_SWSCALE_ALPHA
if (c->alpPixBuf) {
*yuv2packedX = yuv2abgr32_full_X_c;
+ *yuv2packed2 = yuv2abgr32_full_2_c;
+ *yuv2packed1 = yuv2abgr32_full_1_c;
} else
#endif /* CONFIG_SWSCALE_ALPHA */
{
*yuv2packedX = yuv2xbgr32_full_X_c;
+ *yuv2packed2 = yuv2xbgr32_full_2_c;
+ *yuv2packed1 = yuv2xbgr32_full_1_c;
}
#endif /* !CONFIG_SMALL */
break;
case AV_PIX_FMT_RGB24:
*yuv2packedX = yuv2rgb24_full_X_c;
+ *yuv2packed2 = yuv2rgb24_full_2_c;
+ *yuv2packed1 = yuv2rgb24_full_1_c;
break;
case AV_PIX_FMT_BGR24:
*yuv2packedX = yuv2bgr24_full_X_c;
+ *yuv2packed2 = yuv2bgr24_full_2_c;
+ *yuv2packed1 = yuv2bgr24_full_1_c;
+ break;
+ case AV_PIX_FMT_BGR4_BYTE:
+ *yuv2packedX = yuv2bgr4_byte_full_X_c;
+ *yuv2packed2 = yuv2bgr4_byte_full_2_c;
+ *yuv2packed1 = yuv2bgr4_byte_full_1_c;
+ break;
+ case AV_PIX_FMT_RGB4_BYTE:
+ *yuv2packedX = yuv2rgb4_byte_full_X_c;
+ *yuv2packed2 = yuv2rgb4_byte_full_2_c;
+ *yuv2packed1 = yuv2rgb4_byte_full_1_c;
+ break;
+ case AV_PIX_FMT_BGR8:
+ *yuv2packedX = yuv2bgr8_full_X_c;
+ *yuv2packed2 = yuv2bgr8_full_2_c;
+ *yuv2packed1 = yuv2bgr8_full_1_c;
+ break;
+ case AV_PIX_FMT_RGB8:
+ *yuv2packedX = yuv2rgb8_full_X_c;
+ *yuv2packed2 = yuv2rgb8_full_2_c;
+ *yuv2packed1 = yuv2rgb8_full_1_c;
break;
case AV_PIX_FMT_GBRP:
case AV_PIX_FMT_GBRP9BE:
case AV_PIX_FMT_GBRP9LE:
case AV_PIX_FMT_GBRP10BE:
case AV_PIX_FMT_GBRP10LE:
+ case AV_PIX_FMT_GBRP12BE:
+ case AV_PIX_FMT_GBRP12LE:
+ case AV_PIX_FMT_GBRP14BE:
+ case AV_PIX_FMT_GBRP14LE:
case AV_PIX_FMT_GBRP16BE:
case AV_PIX_FMT_GBRP16LE:
+ case AV_PIX_FMT_GBRAP:
*yuv2anyX = yuv2gbrp_full_X_c;
break;
}
+ if (!*yuv2packedX && !*yuv2anyX)
+ goto YUV_PACKED;
} else {
+ YUV_PACKED:
switch (dstFormat) {
+ case AV_PIX_FMT_RGBA64LE:
+#if CONFIG_SWSCALE_ALPHA
+ if (c->alpPixBuf) {
+ *yuv2packed1 = yuv2rgba64le_1_c;
+ *yuv2packed2 = yuv2rgba64le_2_c;
+ *yuv2packedX = yuv2rgba64le_X_c;
+ } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+ {
+ *yuv2packed1 = yuv2rgbx64le_1_c;
+ *yuv2packed2 = yuv2rgbx64le_2_c;
+ *yuv2packedX = yuv2rgbx64le_X_c;
+ }
+ break;
+ case AV_PIX_FMT_RGBA64BE:
+#if CONFIG_SWSCALE_ALPHA
+ if (c->alpPixBuf) {
+ *yuv2packed1 = yuv2rgba64be_1_c;
+ *yuv2packed2 = yuv2rgba64be_2_c;
+ *yuv2packedX = yuv2rgba64be_X_c;
+ } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+ {
+ *yuv2packed1 = yuv2rgbx64be_1_c;
+ *yuv2packed2 = yuv2rgbx64be_2_c;
+ *yuv2packedX = yuv2rgbx64be_X_c;
+ }
+ break;
+ case AV_PIX_FMT_BGRA64LE:
+#if CONFIG_SWSCALE_ALPHA
+ if (c->alpPixBuf) {
+ *yuv2packed1 = yuv2bgra64le_1_c;
+ *yuv2packed2 = yuv2bgra64le_2_c;
+ *yuv2packedX = yuv2bgra64le_X_c;
+ } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+ {
+ *yuv2packed1 = yuv2bgrx64le_1_c;
+ *yuv2packed2 = yuv2bgrx64le_2_c;
+ *yuv2packedX = yuv2bgrx64le_X_c;
+ }
+ break;
+ case AV_PIX_FMT_BGRA64BE:
+#if CONFIG_SWSCALE_ALPHA
+ if (c->alpPixBuf) {
+ *yuv2packed1 = yuv2bgra64be_1_c;
+ *yuv2packed2 = yuv2bgra64be_2_c;
+ *yuv2packedX = yuv2bgra64be_X_c;
+ } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+ {
+ *yuv2packed1 = yuv2bgrx64be_1_c;
+ *yuv2packed2 = yuv2bgrx64be_2_c;
+ *yuv2packedX = yuv2bgrx64be_X_c;
+ }
+ break;
case AV_PIX_FMT_RGB48LE:
*yuv2packed1 = yuv2rgb48le_1_c;
*yuv2packed2 = yuv2rgb48le_2_c;
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 7e00488..86f40ab 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -4,20 +4,20 @@
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
* based on the equivalent C code in swscale.c
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -295,7 +295,7 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return;
- if (c->srcBpc == 8 && c->dstBpc <= 10) {
+ if (c->srcBpc == 8 && c->dstBpc <= 14) {
c->hyScale = c->hcScale = hScale_altivec_real;
}
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 74b0f18..25282bf 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -3,20 +3,20 @@
*
* copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -97,6 +97,7 @@
#include "libswscale/swscale_internal.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
+#include "libavutil/pixdesc.h"
#include "yuv2rgb_altivec.h"
#if HAVE_ALTIVEC
@@ -317,12 +318,7 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
const ubyte *ui = in[1]; \
const ubyte *vi = in[2]; \
\
- vector unsigned char *oute = \
- (vector unsigned char *) \
- (oplanes[0] + srcSliceY * outstrides[0]); \
- vector unsigned char *outo = \
- (vector unsigned char *) \
- (oplanes[0] + srcSliceY * outstrides[0] + outstrides[0]); \
+ vector unsigned char *oute, *outo; \
\
/* loop moves y{1, 2}i by w */ \
instrides_scl[0] = instrides[0] * 2 - w; \
@@ -332,6 +328,9 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
instrides_scl[2] = instrides[2] - w / 2; \
\
for (i = 0; i < h / 2; i++) { \
+ oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \
+ (srcSliceY + i * 2)); \
+ outo = oute + (outstrides[0] >> 4); \
vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \
vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \
\
@@ -429,9 +428,6 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
vi += 8; \
} \
\
- outo += (outstrides[0]) >> 4; \
- oute += (outstrides[0]) >> 4; \
- \
ui += instrides_scl[1]; \
vi += instrides_scl[2]; \
y1i += instrides_scl[0]; \
@@ -748,7 +744,7 @@ static av_always_inline void yuv2packedX_altivec(SwsContext *c,
if (!printed_error_message) {
av_log(c, AV_LOG_ERROR,
"altivec_yuv2packedX doesn't support %s output\n",
- sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->dstFormat));
printed_error_message = 1;
}
return;
@@ -836,7 +832,7 @@ static av_always_inline void yuv2packedX_altivec(SwsContext *c,
/* Unreachable, I think. */
av_log(c, AV_LOG_ERROR,
"altivec_yuv2packedX doesn't support %s output\n",
- sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->dstFormat));
return;
}
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index 2c5e7ed..aa52a47 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -4,20 +4,20 @@
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
* based on the equivalent C code in swscale.c
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
index 08545b3..2b1c5dd 100644
--- a/libswscale/ppc/yuv2yuv_altivec.c
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -4,20 +4,20 @@
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
* based on the equivalent C code in swscale.c
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 3fb3921..5b1fcf7 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -6,20 +6,20 @@
* Written by Nick Kurshev.
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -73,10 +73,11 @@ void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst,
uint8_t *udst, uint8_t *vdst,
int width, int height,
int lumStride, int chromStride, int srcStride);
-void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
- uint8_t *udst, uint8_t *vdst,
- int width, int height,
- int lumStride, int chromStride, int srcStride);
+void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
+ uint8_t *udst, uint8_t *vdst,
+ int width, int height,
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv);
void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
int srcStride, int dstStride);
void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
@@ -108,7 +109,6 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
const uint8_t *src, int width, int height,
int lumStride, int chromStride, int srcStride);
-#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098 * (1 << RGB2YUV_SHIFT) + 0.5))
#define BV ((int)(-0.071 * (1 << RGB2YUV_SHIFT) + 0.5))
#define BU ((int)( 0.439 * (1 << RGB2YUV_SHIFT) + 0.5))
@@ -184,13 +184,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0xF800) >> 8;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
#else
- *d++ = (bgr & 0xF800) >> 8;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
*d++ = 255;
#endif
}
@@ -223,9 +223,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0xF800) >> 8;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
}
}
@@ -259,13 +259,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x7C00) >> 7;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
#else
- *d++ = (bgr & 0x7C00) >> 7;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
*d++ = 255;
#endif
}
@@ -279,9 +279,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0x7C00) >> 7;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
}
}
@@ -318,18 +318,6 @@ void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size)
}
}
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size)
-{
- int i, num_pixels = src_size;
-
- for (i = 0; i < num_pixels; i++) {
- register uint8_t rgb = src[i];
- unsigned r = (rgb & 0x07);
- unsigned g = (rgb & 0x38) >> 3;
- unsigned b = (rgb & 0xC0) >> 6;
- dst[i] = ((b << 1) & 0x07) | ((g & 0x07) << 3) | ((r & 0x03) << 6);
- }
-}
#define DEFINE_SHUFFLE_BYTES(a, b, c, d) \
void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src, \
@@ -349,3 +337,57 @@ DEFINE_SHUFFLE_BYTES(0, 3, 2, 1)
DEFINE_SHUFFLE_BYTES(1, 2, 3, 0)
DEFINE_SHUFFLE_BYTES(3, 0, 1, 2)
DEFINE_SHUFFLE_BYTES(3, 2, 1, 0)
+
+#define DEFINE_RGB48TOBGR48(need_bswap, swap) \
+void rgb48tobgr48_ ## need_bswap(const uint8_t *src, \
+ uint8_t *dst, int src_size) \
+{ \
+ uint16_t *d = (uint16_t *)dst; \
+ uint16_t *s = (uint16_t *)src; \
+ int i, num_pixels = src_size >> 1; \
+ \
+ for (i = 0; i < num_pixels; i += 3) { \
+ d[i ] = swap ? av_bswap16(s[i + 2]) : s[i + 2]; \
+ d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1]; \
+ d[i + 2] = swap ? av_bswap16(s[i ]) : s[i ]; \
+ } \
+}
+
+DEFINE_RGB48TOBGR48(nobswap, 0)
+DEFINE_RGB48TOBGR48(bswap, 1)
+
+#define DEFINE_RGB64TOBGR48(need_bswap, swap) \
+void rgb64tobgr48_ ## need_bswap(const uint8_t *src, \
+ uint8_t *dst, int src_size) \
+{ \
+ uint16_t *d = (uint16_t *)dst; \
+ uint16_t *s = (uint16_t *)src; \
+ int i, num_pixels = src_size >> 3; \
+ \
+ for (i = 0; i < num_pixels; i++) { \
+ d[3 * i ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \
+ d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \
+ d[3 * i + 2] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \
+ } \
+}
+
+DEFINE_RGB64TOBGR48(nobswap, 0)
+DEFINE_RGB64TOBGR48(bswap, 1)
+
+#define DEFINE_RGB64TO48(need_bswap, swap) \
+void rgb64to48_ ## need_bswap(const uint8_t *src, \
+ uint8_t *dst, int src_size) \
+{ \
+ uint16_t *d = (uint16_t *)dst; \
+ uint16_t *s = (uint16_t *)src; \
+ int i, num_pixels = src_size >> 3; \
+ \
+ for (i = 0; i < num_pixels; i++) { \
+ d[3 * i ] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \
+ d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \
+ d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \
+ } \
+}
+
+DEFINE_RGB64TO48(nobswap, 0)
+DEFINE_RGB64TO48(bswap, 1)
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index f47b04e..5df5dea 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -6,20 +6,20 @@
* Written by Nick Kurshev.
* YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -52,6 +52,12 @@ extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size);
void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size);
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
@@ -64,16 +70,15 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size);
void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_3012(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_3210(const uint8_t *src, uint8_t *dst, int src_size);
-void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
- uint8_t *vdst, int width, int height, int lumStride,
- int chromStride, int srcStride);
+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+ uint8_t *vdst, int width, int height, int lumStride,
+ int chromStride, int srcStride, int32_t *rgb2yuv);
/**
* Height should be a multiple of 2 and width should be a multiple of 16.
@@ -119,9 +124,10 @@ extern void (*yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uin
* Chrominance data is only taken from every second line, others are ignored.
* FIXME: Write high quality version.
*/
-extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- int width, int height,
- int lumStride, int chromStride, int srcStride);
+extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ int width, int height,
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv);
extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
int srcStride, int dstStride);
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 65ea5dd..121f4ef 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -7,20 +7,20 @@
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
* lot of big-endian byte order fixes by Alex Beregszaszi
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -240,27 +240,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
}
}
-/*
- * I use less accurate approximation here by simply left-shifting the input
- * value and filling the low order bits with zeroes. This method improves PNG
- * compression but this scheme cannot reproduce white exactly, since it does
- * not generate an all-ones maximum value; the net effect is to darken the
- * image slightly.
- *
- * The better method should be "left bit replication":
- *
- * 4 3 2 1 0
- * ---------
- * 1 1 0 1 1
- *
- * 7 6 5 4 3 2 1 0
- * ----------------
- * 1 1 0 1 1 1 1 0
- * |=======| |===|
- * | leftmost bits repeated to fill open bits
- * |
- * original bits
- */
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
int src_size)
{
@@ -270,9 +249,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x7C00) >> 7;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -285,9 +264,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0xF800) >> 8;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -301,13 +280,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0x7C00) >> 7;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
#else
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x7C00) >> 7;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
#endif
}
@@ -323,13 +302,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0xF800) >> 8;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
#else
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0xF800) >> 8;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
#endif
}
@@ -376,9 +355,9 @@ static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc,
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2) {
uint64_t k = yc[0] + (uc[0] << 8) +
- (yc[1] << 16) + (vc[0] << 24);
+ (yc[1] << 16) + (unsigned)(vc[0] << 24);
uint64_t l = yc[2] + (uc[1] << 8) +
- (yc[3] << 16) + (vc[1] << 24);
+ (yc[3] << 16) + (unsigned)(vc[1] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
@@ -440,9 +419,9 @@ static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc,
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2) {
uint64_t k = uc[0] + (yc[0] << 8) +
- (vc[0] << 16) + (yc[1] << 24);
+ (vc[0] << 16) + (unsigned)(yc[1] << 24);
uint64_t l = uc[1] + (yc[2] << 8) +
- (vc[1] << 16) + (yc[3] << 24);
+ (vc[1] << 16) + (unsigned)(yc[3] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
@@ -635,10 +614,13 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
* others are ignored in the C version.
* FIXME: Write HQ version.
*/
-void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
uint8_t *vdst, int width, int height, int lumStride,
- int chromStride, int srcStride)
+ int chromStride, int srcStride, int32_t *rgb2yuv)
{
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
int y;
const int chromWidth = width >> 1;
@@ -649,9 +631,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
unsigned int g = src[6 * i + 1];
unsigned int r = src[6 * i + 2];
- unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
- unsigned int V = ((RV * r + GV * g + BV * b) >> RGB2YUV_SHIFT) + 128;
- unsigned int U = ((RU * r + GU * g + BU * b) >> RGB2YUV_SHIFT) + 128;
+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
udst[i] = U;
vdst[i] = V;
@@ -661,18 +643,21 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
g = src[6 * i + 4];
r = src[6 * i + 5];
- Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
ydst[2 * i + 1] = Y;
}
ydst += lumStride;
src += srcStride;
+ if (y+1 == height)
+ break;
+
for (i = 0; i < chromWidth; i++) {
unsigned int b = src[6 * i + 0];
unsigned int g = src[6 * i + 1];
unsigned int r = src[6 * i + 2];
- unsigned int Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
ydst[2 * i] = Y;
@@ -680,7 +665,7 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
g = src[6 * i + 4];
r = src[6 * i + 5];
- Y = ((RY * r + GY * g + BY * b) >> RGB2YUV_SHIFT) + 16;
+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
ydst[2 * i + 1] = Y;
}
udst += chromStride;
@@ -856,7 +841,7 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth = -((-width) >> 1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y = 0; y < height; y++) {
extract_even_c(src, ydst, width);
@@ -876,7 +861,7 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth = -((-width) >> 1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y = 0; y < height; y++) {
extract_even_c(src, ydst, width);
@@ -894,7 +879,7 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth = -((-width) >> 1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y = 0; y < height; y++) {
extract_even_c(src + 1, ydst, width);
@@ -914,7 +899,7 @@ static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth = -((-width) >> 1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y = 0; y < height; y++) {
extract_even_c(src + 1, ydst, width);
@@ -953,7 +938,7 @@ static av_cold void rgb2rgb_init_c(void)
yuv422ptouyvy = yuv422ptouyvy_c;
yuy2toyv12 = yuy2toyv12_c;
planar2x = planar2x_c;
- rgb24toyv12 = rgb24toyv12_c;
+ ff_rgb24toyv12 = ff_rgb24toyv12_c;
interleaveBytes = interleaveBytes_c;
deinterleaveBytes = deinterleaveBytes_c;
vu9_to_vu12 = vu9_to_vu12_c;
diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index 8063519..661ff5b 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -53,7 +53,7 @@
(x) == AV_PIX_FMT_RGB32_1 || \
(x) == AV_PIX_FMT_YUVA420P)
-static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1,
+static uint64_t getSSD(const uint8_t *src1, const uint8_t *src2, int stride1,
int stride2, int w, int h)
{
int x, y;
@@ -92,7 +92,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
static int srcStride[4];
uint8_t *dst[4] = { 0 };
uint8_t *out[4] = { 0 };
- int dstStride[4];
+ int dstStride[4] = {0};
int i;
uint64_t ssdY, ssdU = 0, ssdV = 0, ssdA = 0;
struct SwsContext *dstContext = NULL, *outContext = NULL;
@@ -108,6 +108,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
av_image_fill_linesizes(srcStride, srcFormat, srcW);
for (p = 0; p < 4; p++) {
+ srcStride[p] = FFALIGN(srcStride[p], 16);
if (srcStride[p])
src[p] = av_mallocz(srcStride[p] * srcH + 16);
if (srcStride[p] && !src[p]) {
@@ -125,7 +126,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
res = -1;
goto end;
}
- sws_scale(srcContext, ref, refStride, 0, h, src, srcStride);
+ sws_scale(srcContext, (const uint8_t * const*)ref, refStride, 0, h, src, srcStride);
sws_freeContext(srcContext);
cur_srcFormat = srcFormat;
@@ -141,6 +142,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
* allocated with av_malloc). */
/* An extra 16 bytes is being allocated because some scalers may write
* out of bounds. */
+ dstStride[i] = FFALIGN(dstStride[i], 16);
if (dstStride[i])
dst[i] = av_mallocz(dstStride[i] * dstH + 16);
if (dstStride[i] && !dst[i]) {
@@ -166,7 +168,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
flags);
fflush(stdout);
- sws_scale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
+ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
for (i = 0; i < 4 && dstStride[i]; i++)
crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
@@ -179,6 +181,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
ssdA = r->ssdA;
} else {
for (i = 0; i < 4; i++) {
+ refStride[i] = FFALIGN(refStride[i], 16);
if (refStride[i])
out[i] = av_mallocz(refStride[i] * h);
if (refStride[i] && !out[i]) {
@@ -197,7 +200,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
res = -1;
goto end;
}
- sws_scale(outContext, dst, dstStride, 0, dstH, out, refStride);
+ sws_scale(outContext, (const uint8_t * const*)dst, dstStride, 0, dstH, out, refStride);
ssdY = getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
if (hasChroma(srcFormat) && hasChroma(dstFormat)) {
@@ -304,7 +307,7 @@ static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp,
ret = sscanf(buf,
" %12s %dx%d -> %12s %dx%d flags=%d CRC=%x"
- " SSD=%"PRId64 ", %"PRId64 ", %"PRId64 ", %"PRId64 "\n",
+ " SSD=%"SCNd64 ", %"SCNd64 ", %"SCNd64 ", %"SCNd64 "\n",
srcStr, &srcW, &srcH, dstStr, &dstW, &dstH,
&flags, &r.crc, &r.ssdY, &r.ssdU, &r.ssdV, &r.ssdA);
if (ret != 12) {
@@ -315,7 +318,8 @@ static int fileTest(uint8_t *ref[4], int refStride[4], int w, int h, FILE *fp,
srcFormat = av_get_pix_fmt(srcStr);
dstFormat = av_get_pix_fmt(dstStr);
- if (srcFormat == AV_PIX_FMT_NONE || dstFormat == AV_PIX_FMT_NONE) {
+ if (srcFormat == AV_PIX_FMT_NONE || dstFormat == AV_PIX_FMT_NONE ||
+ srcW > 8192U || srcH > 8192U || dstW > 8192U || dstH > 8192U) {
fprintf(stderr, "malformed input file\n");
return -1;
}
@@ -344,7 +348,7 @@ int main(int argc, char **argv)
enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
uint8_t *rgb_data = av_malloc(W * H * 4);
- uint8_t *rgb_src[4] = { rgb_data, NULL, NULL, NULL };
+ const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
int rgb_stride[4] = { 4 * W, 0, 0, 0 };
uint8_t *data = av_malloc(4 * W * H);
uint8_t *src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
@@ -354,34 +358,20 @@ int main(int argc, char **argv)
AVLFG rand;
int res = -1;
int i;
+ FILE *fp = NULL;
if (!rgb_data || !data)
return -1;
- sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
- AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
-
- av_lfg_init(&rand, 1);
-
- for (y = 0; y < H; y++)
- for (x = 0; x < W * 4; x++)
- rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
- sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
- sws_freeContext(sws);
- av_free(rgb_data);
-
for (i = 1; i < argc; i += 2) {
if (argv[i][0] != '-' || i + 1 == argc)
goto bad_option;
if (!strcmp(argv[i], "-ref")) {
- FILE *fp = fopen(argv[i + 1], "r");
+ fp = fopen(argv[i + 1], "r");
if (!fp) {
fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
goto error;
}
- res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
- fclose(fp);
- goto end;
} else if (!strcmp(argv[i], "-src")) {
srcFormat = av_get_pix_fmt(argv[i + 1]);
if (srcFormat == AV_PIX_FMT_NONE) {
@@ -401,9 +391,25 @@ bad_option:
}
}
- selfTest(src, stride, W, H, srcFormat, dstFormat);
-end:
- res = 0;
+ sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
+ AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
+
+ av_lfg_init(&rand, 1);
+
+ for (y = 0; y < H; y++)
+ for (x = 0; x < W * 4; x++)
+ rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
+ sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
+ sws_freeContext(sws);
+ av_free(rgb_data);
+
+ if(fp) {
+ res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
+ fclose(fp);
+ } else {
+ selfTest(src, stride, W, H, srcFormat, dstFormat);
+ res = 0;
+ }
error:
av_free(data);
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 7756e1b..59ead12 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1,29 +1,29 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <assert.h>
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
+#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
@@ -35,7 +35,7 @@
#include "swscale_internal.h"
#include "swscale.h"
-DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[8][8] = {
+DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[9][8] = {
{ 36, 68, 60, 92, 34, 66, 58, 90, },
{ 100, 4, 124, 28, 98, 2, 122, 26, },
{ 52, 84, 44, 76, 50, 82, 42, 74, },
@@ -44,6 +44,7 @@ DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_128)[8][8] = {
{ 96, 0, 120, 24, 102, 6, 126, 30, },
{ 48, 80, 40, 72, 54, 86, 46, 78, },
{ 112, 16, 104, 8, 118, 22, 110, 14, },
+ { 36, 68, 60, 92, 34, 66, 58, 90, },
};
DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = {
@@ -61,28 +62,6 @@ static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
}
}
-static void fill_plane9or10(uint8_t *plane, int stride, int width,
- int height, int y, uint8_t val,
- const int dst_depth, const int big_endian)
-{
- int i, j;
- uint16_t *dst = (uint16_t *) (plane + stride * y);
-#define FILL8TO9_OR_10(wfunc) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < width; j++) { \
- wfunc(&dst[j], (val << (dst_depth - 8)) | \
- (val >> (16 - dst_depth))); \
- } \
- dst += stride / 2; \
- }
- if (big_endian) {
- FILL8TO9_OR_10(AV_WB16);
- } else {
- FILL8TO9_OR_10(AV_WL16);
- }
-}
-
-
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
const uint8_t *_src, const int16_t *filter,
const int32_t *filterPos, int filterSize)
@@ -94,6 +73,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
int bits = desc->comp[0].depth_minus1;
int sh = bits - 4;
+ if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth_minus1<15)
+ sh= 9;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -116,6 +98,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
const uint16_t *src = (const uint16_t *) _src;
int sh = desc->comp[0].depth_minus1;
+ if(sh<15)
+ sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : desc->comp[0].depth_minus1;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -223,8 +208,9 @@ static void lumRangeToJpeg16_c(int16_t *_dst, int width)
{
int i;
int32_t *dst = (int32_t *) _dst;
- for (i = 0; i < width; i++)
- dst[i] = (FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
+ for (i = 0; i < width; i++) {
+ dst[i] = ((int)(FFMIN(dst[i], 30189 << 4) * 4769U - (39057361 << 2))) >> 12;
+ }
}
static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
@@ -232,20 +218,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
int i;
int32_t *dst = (int32_t *) _dst;
for (i = 0; i < width; i++)
- dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
-}
-
-static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
- const uint8_t *src, int srcW, int xInc)
-{
- int i;
- unsigned int xpos = 0;
- for (i = 0; i < dstWidth; i++) {
- register unsigned int xx = xpos >> 16;
- register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
- dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
- xpos += xInc;
- }
+ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
}
// *** horizontal scale Y line to temp buffer
@@ -258,16 +231,19 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
- void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) =
+ void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
const uint8_t *src = src_in[isAlpha ? 3 : 0];
if (toYV12) {
- toYV12(formatConvBuffer, src, srcW, pal);
+ toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
src = formatConvBuffer;
} else if (c->readLumPlanar && !isAlpha) {
- c->readLumPlanar(formatConvBuffer, src_in, srcW);
+ c->readLumPlanar(formatConvBuffer, src_in, srcW, c->input_rgb2yuv_table);
+ src = formatConvBuffer;
+ } else if (c->readAlpPlanar && isAlpha) {
+ c->readAlpPlanar(formatConvBuffer, src_in, srcW, NULL);
src = formatConvBuffer;
}
@@ -282,21 +258,6 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
convertRange(dst, dstWidth);
}
-static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
- int dstWidth, const uint8_t *src1,
- const uint8_t *src2, int srcW, int xInc)
-{
- int i;
- unsigned int xpos = 0;
- for (i = 0; i < dstWidth; i++) {
- register unsigned int xx = xpos >> 16;
- register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
- dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
- dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
- xpos += xInc;
- }
-}
-
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
int16_t *dst2, int dstWidth,
const uint8_t *src_in[4],
@@ -309,14 +270,14 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
if (c->chrToYV12) {
uint8_t *buf2 = formatConvBuffer +
- FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
- c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
- src1 = formatConvBuffer;
- src2 = buf2;
+ FFALIGN(srcW*2+78, 16);
+ c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
+ src1= formatConvBuffer;
+ src2= buf2;
} else if (c->readChrPlanar) {
uint8_t *buf2 = formatConvBuffer +
- FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
- c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
+ FFALIGN(srcW*2+78, 16);
+ c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW, c->input_rgb2yuv_table);
src1 = formatConvBuffer;
src2 = buf2;
}
@@ -356,8 +317,6 @@ static int swscale(SwsContext *c, const uint8_t *src[],
int32_t *vChrFilterPos = c->vChrFilterPos;
int32_t *hLumFilterPos = c->hLumFilterPos;
int32_t *hChrFilterPos = c->hChrFilterPos;
- int16_t *vLumFilter = c->vLumFilter;
- int16_t *vChrFilter = c->vChrFilter;
int16_t *hLumFilter = c->hLumFilter;
int16_t *hChrFilter = c->hChrFilter;
int32_t *lumMmxFilter = c->lumMmxFilter;
@@ -381,8 +340,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX;
yuv2anyX_fn yuv2anyX = c->yuv2anyX;
- const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
- const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample);
+ const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample;
+ const int chrSrcSliceH = FF_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample);
int should_dither = is9_OR_10BPS(c->srcFormat) ||
is16BPS(c->srcFormat);
int lastDstY;
@@ -394,6 +353,10 @@ static int swscale(SwsContext *c, const uint8_t *src[],
int lastInLumBuf = c->lastInLumBuf;
int lastInChrBuf = c->lastInChrBuf;
+ if (!usePal(c->srcFormat)) {
+ pal = c->input_rgb2yuv_table;
+ }
+
if (isPacked(c->srcFormat)) {
src[0] =
src[1] =
@@ -417,8 +380,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
- if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
- dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
+ if (dstStride[0]&15 || dstStride[1]&15 ||
+ dstStride[2]&15 || dstStride[3]&15) {
static int warnedAlready = 0; // FIXME maybe move this into the context
if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING,
@@ -428,6 +391,19 @@ static int swscale(SwsContext *c, const uint8_t *src[],
}
}
+ if ( (uintptr_t)dst[0]&15 || (uintptr_t)dst[1]&15 || (uintptr_t)dst[2]&15
+ || (uintptr_t)src[0]&15 || (uintptr_t)src[1]&15 || (uintptr_t)src[2]&15
+ || dstStride[0]&15 || dstStride[1]&15 || dstStride[2]&15 || dstStride[3]&15
+ || srcStride[0]&15 || srcStride[1]&15 || srcStride[2]&15 || srcStride[3]&15
+ ) {
+ static int warnedAlready=0;
+ int cpu_flags = av_get_cpu_flags();
+ if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
+ av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
+ warnedAlready=1;
+ }
+ }
+
/* Note the user might start scaling the picture in the middle so this
* will not get executed. This is not really intended but works
* currently, so people might do it. */
@@ -452,6 +428,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
dst[2] + dstStride[2] * chrDstY,
(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
};
+ int use_mmx_vfilter= c->use_mmx_vfilter;
// First line needed as input
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
@@ -470,8 +447,8 @@ static int swscale(SwsContext *c, const uint8_t *src[],
lastInLumBuf = firstLumSrcY - 1;
if (firstChrSrcY > lastInChrBuf)
lastInChrBuf = firstChrSrcY - 1;
- assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
- assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
+ av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
+ av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
DEBUG_BUFFERS("dstY: %d\n", dstY);
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
@@ -481,7 +458,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
// Do we have enough lines in this slice to output the dstY line
enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
- lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample);
+ lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample);
if (!enough_lines) {
lastLumSrcY = srcSliceY + srcSliceH - 1;
@@ -499,9 +476,9 @@ static int swscale(SwsContext *c, const uint8_t *src[],
src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
};
lumBufIndex++;
- assert(lumBufIndex < 2 * vLumBufSize);
- assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
- assert(lastInLumBuf + 1 - srcSliceY >= 0);
+ av_assert0(lumBufIndex < 2 * vLumBufSize);
+ av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH);
+ av_assert0(lastInLumBuf + 1 - srcSliceY >= 0);
hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer, pal, 0);
@@ -521,9 +498,9 @@ static int swscale(SwsContext *c, const uint8_t *src[],
src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
};
chrBufIndex++;
- assert(chrBufIndex < 2 * vChrBufSize);
- assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
- assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
+ av_assert0(chrBufIndex < 2 * vChrBufSize);
+ av_assert0(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
+ av_assert0(lastInChrBuf + 1 - chrSrcSliceY >= 0);
// FIXME replace parameters through context struct (some at least)
if (c->needs_hcscale)
@@ -556,103 +533,81 @@ static int swscale(SwsContext *c, const uint8_t *src[],
* this array's tail */
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
+ use_mmx_vfilter= 0;
}
{
- const int16_t **lumSrcPtr = (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
- (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-
- if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
- const int16_t **tmpY = (const int16_t **)lumPixBuf +
- 2 * vLumBufSize;
- int neg = -firstLumSrcY, i;
- int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
- for (i = 0; i < neg; i++)
- tmpY[i] = lumSrcPtr[neg];
- for (; i < end; i++)
- tmpY[i] = lumSrcPtr[i];
- for (; i < vLumFilterSize; i++)
- tmpY[i] = tmpY[i - 1];
- lumSrcPtr = tmpY;
-
- if (alpSrcPtr) {
- const int16_t **tmpA = (const int16_t **)alpPixBuf +
- 2 * vLumBufSize;
- for (i = 0; i < neg; i++)
- tmpA[i] = alpSrcPtr[neg];
- for (; i < end; i++)
- tmpA[i] = alpSrcPtr[i];
- for (; i < vLumFilterSize; i++)
- tmpA[i] = tmpA[i - 1];
- alpSrcPtr = tmpA;
- }
- }
- if (firstChrSrcY < 0 ||
- firstChrSrcY + vChrFilterSize > c->chrSrcH) {
- const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize,
- **tmpV = (const int16_t **)chrVPixBuf + 2 * vChrBufSize;
- int neg = -firstChrSrcY, i;
- int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
- for (i = 0; i < neg; i++) {
- tmpU[i] = chrUSrcPtr[neg];
- tmpV[i] = chrVSrcPtr[neg];
- }
- for (; i < end; i++) {
- tmpU[i] = chrUSrcPtr[i];
- tmpV[i] = chrVSrcPtr[i];
- }
- for (; i < vChrFilterSize; i++) {
- tmpU[i] = tmpU[i - 1];
- tmpV[i] = tmpV[i - 1];
- }
- chrUSrcPtr = tmpU;
- chrVSrcPtr = tmpV;
- }
+ (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ int16_t *vLumFilter = c->vLumFilter;
+ int16_t *vChrFilter = c->vChrFilter;
if (isPlanarYUV(dstFormat) ||
(isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
+ vLumFilter += dstY * vLumFilterSize;
+ vChrFilter += chrDstY * vChrFilterSize;
+
+// av_assert0(use_mmx_vfilter != (
+// yuv2planeX == yuv2planeX_10BE_c
+// || yuv2planeX == yuv2planeX_10LE_c
+// || yuv2planeX == yuv2planeX_9BE_c
+// || yuv2planeX == yuv2planeX_9LE_c
+// || yuv2planeX == yuv2planeX_16BE_c
+// || yuv2planeX == yuv2planeX_16LE_c
+// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
+
+ if(use_mmx_vfilter){
+ vLumFilter= (int16_t *)c->lumMmxFilter;
+ vChrFilter= (int16_t *)c->chrMmxFilter;
+ }
+
if (vLumFilterSize == 1) {
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize,
- vLumFilterSize, lumSrcPtr, dest[0],
+ yuv2planeX(vLumFilter, vLumFilterSize,
+ lumSrcPtr, dest[0],
dstW, c->lumDither8, 0);
}
if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
if (yuv2nv12cX) {
- yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
+ yuv2nv12cX(c, vChrFilter,
vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
dest[1], chrDstW);
} else if (vChrFilterSize == 1) {
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
} else {
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
+ yuv2planeX(vChrFilter,
vChrFilterSize, chrUSrcPtr, dest[1],
chrDstW, c->chrDither8, 0);
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
+ yuv2planeX(vChrFilter,
vChrFilterSize, chrVSrcPtr, dest[2],
- chrDstW, c->chrDither8, 3);
+ chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
}
}
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+ if(use_mmx_vfilter){
+ vLumFilter= (int16_t *)c->alpMmxFilter;
+ }
if (vLumFilterSize == 1) {
yuv2plane1(alpSrcPtr[0], dest[3], dstW,
c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize,
+ yuv2planeX(vLumFilter,
vLumFilterSize, alpSrcPtr, dest[3],
dstW, c->lumDither8, 0);
}
}
} else if (yuv2packedX) {
+ av_assert1(lumSrcPtr + vLumFilterSize - 1 < (const int16_t **)lumPixBuf + vLumBufSize * 2);
+ av_assert1(chrUSrcPtr + vChrFilterSize - 1 < (const int16_t **)chrUPixBuf + vChrBufSize * 2);
if (c->yuv2packed1 && vLumFilterSize == 1 &&
vChrFilterSize <= 2) { // unscaled RGB
int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
@@ -678,6 +633,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
alpSrcPtr, dest[0], dstW, dstY);
}
} else {
+ av_assert1(!yuv2packed1 && !yuv2packed2);
yuv2anyX(c, vLumFilter + dstY * vLumFilterSize,
lumSrcPtr, vLumFilterSize,
vChrFilter + dstY * vChrFilterSize,
@@ -686,18 +642,15 @@ static int swscale(SwsContext *c, const uint8_t *src[],
}
}
}
-
if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) {
int length = dstW;
int height = dstY - lastDstY;
- if (is16BPS(c->dstFormat))
- length *= 2;
- if (is9_OR_10BPS(dstFormat)) {
+ if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
- fill_plane9or10(dst[3], dstStride[3], length, height, lastDstY,
- 255, desc->comp[3].depth_minus1 + 1,
- isBE(dstFormat));
+ fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
+ 1, desc->comp[3].depth_minus1,
+ isBE(dstFormat));
} else
fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
}
@@ -718,6 +671,31 @@ static int swscale(SwsContext *c, const uint8_t *src[],
return dstY - lastDstY;
}
+av_cold void ff_sws_init_range_convert(SwsContext *c)
+{
+ c->lumConvertRange = NULL;
+ c->chrConvertRange = NULL;
+ if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
+ if (c->dstBpc <= 14) {
+ if (c->srcRange) {
+ c->lumConvertRange = lumRangeFromJpeg_c;
+ c->chrConvertRange = chrRangeFromJpeg_c;
+ } else {
+ c->lumConvertRange = lumRangeToJpeg_c;
+ c->chrConvertRange = chrRangeToJpeg_c;
+ }
+ } else {
+ if (c->srcRange) {
+ c->lumConvertRange = lumRangeFromJpeg16_c;
+ c->chrConvertRange = chrRangeFromJpeg16_c;
+ } else {
+ c->lumConvertRange = lumRangeToJpeg16_c;
+ c->chrConvertRange = chrRangeToJpeg16_c;
+ }
+ }
+ }
+}
+
static av_cold void sws_init_swscale(SwsContext *c)
{
enum AVPixelFormat srcFormat = c->srcFormat;
@@ -728,40 +706,23 @@ static av_cold void sws_init_swscale(SwsContext *c)
ff_sws_init_input_funcs(c);
+
if (c->srcBpc == 8) {
- if (c->dstBpc <= 10) {
+ if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = hScale8To15_c;
if (c->flags & SWS_FAST_BILINEAR) {
- c->hyscale_fast = hyscale_fast_c;
- c->hcscale_fast = hcscale_fast_c;
+ c->hyscale_fast = ff_hyscale_fast_c;
+ c->hcscale_fast = ff_hcscale_fast_c;
}
} else {
c->hyScale = c->hcScale = hScale8To19_c;
}
} else {
- c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
+ c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
: hScale16To15_c;
}
- if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
- if (c->dstBpc <= 10) {
- if (c->srcRange) {
- c->lumConvertRange = lumRangeFromJpeg_c;
- c->chrConvertRange = chrRangeFromJpeg_c;
- } else {
- c->lumConvertRange = lumRangeToJpeg_c;
- c->chrConvertRange = chrRangeToJpeg_c;
- }
- } else {
- if (c->srcRange) {
- c->lumConvertRange = lumRangeFromJpeg16_c;
- c->chrConvertRange = chrRangeFromJpeg16_c;
- } else {
- c->lumConvertRange = lumRangeToJpeg16_c;
- c->chrConvertRange = chrRangeToJpeg16_c;
- }
- }
- }
+ ff_sws_init_range_convert(c);
if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE))
@@ -779,3 +740,349 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
return swscale;
}
+
+static void reset_ptr(const uint8_t *src[], int format)
+{
+ if (!isALPHA(format))
+ src[3] = NULL;
+ if (!isPlanar(format)) {
+ src[3] = src[2] = NULL;
+
+ if (!usePal(format))
+ src[1] = NULL;
+ }
+}
+
+static int check_image_pointers(const uint8_t * const data[4], enum AVPixelFormat pix_fmt,
+ const int linesizes[4])
+{
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ int plane = desc->comp[i].plane;
+ if (!data[plane] || !linesizes[plane])
+ return 0;
+ }
+
+ return 1;
+}
+
+static void xyz12Torgb48(struct SwsContext *c, uint16_t *dst,
+ const uint16_t *src, int stride, int h)
+{
+ int xp,yp;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
+
+ for (yp=0; yp<h; yp++) {
+ for (xp=0; xp+2<stride; xp+=3) {
+ int x, y, z, r, g, b;
+
+ if (desc->flags & AV_PIX_FMT_FLAG_BE) {
+ x = AV_RB16(src + xp + 0);
+ y = AV_RB16(src + xp + 1);
+ z = AV_RB16(src + xp + 2);
+ } else {
+ x = AV_RL16(src + xp + 0);
+ y = AV_RL16(src + xp + 1);
+ z = AV_RL16(src + xp + 2);
+ }
+
+ x = c->xyzgamma[x>>4];
+ y = c->xyzgamma[y>>4];
+ z = c->xyzgamma[z>>4];
+
+ // convert from XYZlinear to sRGBlinear
+ r = c->xyz2rgb_matrix[0][0] * x +
+ c->xyz2rgb_matrix[0][1] * y +
+ c->xyz2rgb_matrix[0][2] * z >> 12;
+ g = c->xyz2rgb_matrix[1][0] * x +
+ c->xyz2rgb_matrix[1][1] * y +
+ c->xyz2rgb_matrix[1][2] * z >> 12;
+ b = c->xyz2rgb_matrix[2][0] * x +
+ c->xyz2rgb_matrix[2][1] * y +
+ c->xyz2rgb_matrix[2][2] * z >> 12;
+
+ // limit values to 12-bit depth
+ r = av_clip_c(r,0,4095);
+ g = av_clip_c(g,0,4095);
+ b = av_clip_c(b,0,4095);
+
+ // convert from sRGBlinear to RGB and scale from 12bit to 16bit
+ if (desc->flags & AV_PIX_FMT_FLAG_BE) {
+ AV_WB16(dst + xp + 0, c->rgbgamma[r] << 4);
+ AV_WB16(dst + xp + 1, c->rgbgamma[g] << 4);
+ AV_WB16(dst + xp + 2, c->rgbgamma[b] << 4);
+ } else {
+ AV_WL16(dst + xp + 0, c->rgbgamma[r] << 4);
+ AV_WL16(dst + xp + 1, c->rgbgamma[g] << 4);
+ AV_WL16(dst + xp + 2, c->rgbgamma[b] << 4);
+ }
+ }
+ src += stride;
+ dst += stride;
+ }
+}
+
+static void rgb48Toxyz12(struct SwsContext *c, uint16_t *dst,
+ const uint16_t *src, int stride, int h)
+{
+ int xp,yp;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
+
+ for (yp=0; yp<h; yp++) {
+ for (xp=0; xp+2<stride; xp+=3) {
+ int x, y, z, r, g, b;
+
+ if (desc->flags & AV_PIX_FMT_FLAG_BE) {
+ r = AV_RB16(src + xp + 0);
+ g = AV_RB16(src + xp + 1);
+ b = AV_RB16(src + xp + 2);
+ } else {
+ r = AV_RL16(src + xp + 0);
+ g = AV_RL16(src + xp + 1);
+ b = AV_RL16(src + xp + 2);
+ }
+
+ r = c->rgbgammainv[r>>4];
+ g = c->rgbgammainv[g>>4];
+ b = c->rgbgammainv[b>>4];
+
+ // convert from sRGBlinear to XYZlinear
+ x = c->rgb2xyz_matrix[0][0] * r +
+ c->rgb2xyz_matrix[0][1] * g +
+ c->rgb2xyz_matrix[0][2] * b >> 12;
+ y = c->rgb2xyz_matrix[1][0] * r +
+ c->rgb2xyz_matrix[1][1] * g +
+ c->rgb2xyz_matrix[1][2] * b >> 12;
+ z = c->rgb2xyz_matrix[2][0] * r +
+ c->rgb2xyz_matrix[2][1] * g +
+ c->rgb2xyz_matrix[2][2] * b >> 12;
+
+ // limit values to 12-bit depth
+ x = av_clip_c(x,0,4095);
+ y = av_clip_c(y,0,4095);
+ z = av_clip_c(z,0,4095);
+
+ // convert from XYZlinear to X'Y'Z' and scale from 12bit to 16bit
+ if (desc->flags & AV_PIX_FMT_FLAG_BE) {
+ AV_WB16(dst + xp + 0, c->xyzgammainv[x] << 4);
+ AV_WB16(dst + xp + 1, c->xyzgammainv[y] << 4);
+ AV_WB16(dst + xp + 2, c->xyzgammainv[z] << 4);
+ } else {
+ AV_WL16(dst + xp + 0, c->xyzgammainv[x] << 4);
+ AV_WL16(dst + xp + 1, c->xyzgammainv[y] << 4);
+ AV_WL16(dst + xp + 2, c->xyzgammainv[z] << 4);
+ }
+ }
+ src += stride;
+ dst += stride;
+ }
+}
+
+/**
+ * swscale wrapper, so we don't need to export the SwsContext.
+ * Assumes planar YUV to be in YUV order instead of YVU.
+ */
+int attribute_align_arg sws_scale(struct SwsContext *c,
+ const uint8_t * const srcSlice[],
+ const int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t *const dst[],
+ const int dstStride[])
+{
+ int i, ret;
+ const uint8_t *src2[4];
+ uint8_t *dst2[4];
+ uint8_t *rgb0_tmp = NULL;
+
+ if (!srcStride || !dstStride || !dst || !srcSlice) {
+ av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
+ return 0;
+ }
+ memcpy(src2, srcSlice, sizeof(src2));
+ memcpy(dst2, dst, sizeof(dst2));
+
+ // do not mess up sliceDir if we have a "trailing" 0-size slice
+ if (srcSliceH == 0)
+ return 0;
+
+ if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
+ av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
+ return 0;
+ }
+ if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
+ av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
+ return 0;
+ }
+
+ if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
+ av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
+ return 0;
+ }
+ if (c->sliceDir == 0) {
+ if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
+ }
+
+ if (usePal(c->srcFormat)) {
+ for (i = 0; i < 256; i++) {
+ int r, g, b, y, u, v, a = 0xff;
+ if (c->srcFormat == AV_PIX_FMT_PAL8) {
+ uint32_t p = ((const uint32_t *)(srcSlice[1]))[i];
+ a = (p >> 24) & 0xFF;
+ r = (p >> 16) & 0xFF;
+ g = (p >> 8) & 0xFF;
+ b = p & 0xFF;
+ } else if (c->srcFormat == AV_PIX_FMT_RGB8) {
+ r = ( i >> 5 ) * 36;
+ g = ((i >> 2) & 7) * 36;
+ b = ( i & 3) * 85;
+ } else if (c->srcFormat == AV_PIX_FMT_BGR8) {
+ b = ( i >> 6 ) * 85;
+ g = ((i >> 3) & 7) * 36;
+ r = ( i & 7) * 36;
+ } else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) {
+ r = ( i >> 3 ) * 255;
+ g = ((i >> 1) & 3) * 85;
+ b = ( i & 1) * 255;
+ } else if (c->srcFormat == AV_PIX_FMT_GRAY8 || c->srcFormat == AV_PIX_FMT_GRAY8A) {
+ r = g = b = i;
+ } else {
+ av_assert1(c->srcFormat == AV_PIX_FMT_BGR4_BYTE);
+ b = ( i >> 3 ) * 255;
+ g = ((i >> 1) & 3) * 85;
+ r = ( i & 1) * 255;
+ }
+#define RGB2YUV_SHIFT 15
+#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+
+ y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ c->pal_yuv[i]= y + (u<<8) + (v<<16) + ((unsigned)a<<24);
+
+ switch (c->dstFormat) {
+ case AV_PIX_FMT_BGR32:
+#if !HAVE_BIGENDIAN
+ case AV_PIX_FMT_RGB24:
+#endif
+ c->pal_rgb[i]= r + (g<<8) + (b<<16) + ((unsigned)a<<24);
+ break;
+ case AV_PIX_FMT_BGR32_1:
+#if HAVE_BIGENDIAN
+ case AV_PIX_FMT_BGR24:
+#endif
+ c->pal_rgb[i]= a + (r<<8) + (g<<16) + ((unsigned)b<<24);
+ break;
+ case AV_PIX_FMT_RGB32_1:
+#if HAVE_BIGENDIAN
+ case AV_PIX_FMT_RGB24:
+#endif
+ c->pal_rgb[i]= a + (b<<8) + (g<<16) + ((unsigned)r<<24);
+ break;
+ case AV_PIX_FMT_RGB32:
+#if !HAVE_BIGENDIAN
+ case AV_PIX_FMT_BGR24:
+#endif
+ default:
+ c->pal_rgb[i]= b + (g<<8) + (r<<16) + ((unsigned)a<<24);
+ }
+ }
+ }
+
+ if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
+ uint8_t *base;
+ int x,y;
+ rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
+ if (!rgb0_tmp)
+ return AVERROR(ENOMEM);
+
+ base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
+ for (y=0; y<srcSliceH; y++){
+ memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW);
+ for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) {
+ base[ srcStride[0]*y + x] = 0xFF;
+ }
+ }
+ src2[0] = base;
+ }
+
+ if (c->srcXYZ && !(c->dstXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
+ uint8_t *base;
+ rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
+ if (!rgb0_tmp)
+ return AVERROR(ENOMEM);
+
+ base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
+
+ xyz12Torgb48(c, (uint16_t*)base, (const uint16_t*)src2[0], srcStride[0]/2, srcSliceH);
+ src2[0] = base;
+ }
+
+ if (!srcSliceY && (c->flags & SWS_BITEXACT) && c->dither == SWS_DITHER_ED && c->dither_error[0])
+ for (i = 0; i < 4; i++)
+ memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2));
+
+
+ // copy strides, so they can safely be modified
+ if (c->sliceDir == 1) {
+ // slices go from top to bottom
+ int srcStride2[4] = { srcStride[0], srcStride[1], srcStride[2],
+ srcStride[3] };
+ int dstStride2[4] = { dstStride[0], dstStride[1], dstStride[2],
+ dstStride[3] };
+
+ reset_ptr(src2, c->srcFormat);
+ reset_ptr((void*)dst2, c->dstFormat);
+
+ /* reset slice direction at end of frame */
+ if (srcSliceY + srcSliceH == c->srcH)
+ c->sliceDir = 0;
+
+ ret = c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2,
+ dstStride2);
+ } else {
+ // slices go from bottom to top => we flip the image internally
+ int srcStride2[4] = { -srcStride[0], -srcStride[1], -srcStride[2],
+ -srcStride[3] };
+ int dstStride2[4] = { -dstStride[0], -dstStride[1], -dstStride[2],
+ -dstStride[3] };
+
+ src2[0] += (srcSliceH - 1) * srcStride[0];
+ if (!usePal(c->srcFormat))
+ src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1];
+ src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2];
+ src2[3] += (srcSliceH - 1) * srcStride[3];
+ dst2[0] += ( c->dstH - 1) * dstStride[0];
+ dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1];
+ dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2];
+ dst2[3] += ( c->dstH - 1) * dstStride[3];
+
+ reset_ptr(src2, c->srcFormat);
+ reset_ptr((void*)dst2, c->dstFormat);
+
+ /* reset slice direction at end of frame */
+ if (!srcSliceY)
+ c->sliceDir = 0;
+
+ ret = c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH,
+ srcSliceH, dst2, dstStride2);
+ }
+
+
+ if (c->dstXYZ && !(c->srcXYZ && c->srcW==c->dstW && c->srcH==c->dstH)) {
+ /* replace on the same data */
+ rgb48Toxyz12(c, (uint16_t*)dst2[0], (const uint16_t*)dst2[0], dstStride[0]/2, ret);
+ }
+
+ av_free(rgb0_tmp);
+ return ret;
+}
+
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 8abbac4..903e120 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -24,8 +24,7 @@
/**
* @file
* @ingroup libsws
- * @brief
- * external api for the swscale stuff
+ * external API header
*/
#include <stdint.h>
@@ -81,6 +80,7 @@ const char *swscale_license(void);
#define SWS_DIRECT_BGR 0x8000
#define SWS_ACCURATE_RND 0x40000
#define SWS_BITEXACT 0x80000
+#define SWS_ERROR_DIFFUSION 0x800000
#if FF_API_SWS_CPU_CAPS
/**
@@ -225,7 +225,13 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
uint8_t *const dst[], const int dstStride[]);
/**
- * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
+ * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
+ * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
+ * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x]
+ * @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x]
+ * @param brightness 16.16 fixed point brightness correction
+ * @param contrast 16.16 fixed point contrast correction
+ * @param saturation 16.16 fixed point saturation correction
* @return -1 if not supported
*/
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index a0daa07..335e1f8 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -27,18 +27,23 @@
#include <altivec.h>
#endif
+#include "version.h"
+
#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/pixfmt.h"
#include "libavutil/pixdesc.h"
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
-#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
+#define YUVRGB_TABLE_HEADROOM 128
+
+#define MAX_FILTER_SIZE SWS_MAX_FILTER_SIZE
-#define MAX_FILTER_SIZE 256
+#define DITHER1XBPP
#if HAVE_BIGENDIAN
#define ALT32_CORR (-1)
@@ -58,6 +63,16 @@
struct SwsContext;
+typedef enum SwsDither {
+ SWS_DITHER_NONE = 0,
+ SWS_DITHER_AUTO,
+ SWS_DITHER_BAYER,
+ SWS_DITHER_ED,
+ SWS_DITHER_A_DITHER,
+ SWS_DITHER_X_DITHER,
+ NB_SWS_DITHER,
+} SwsDither;
+
typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[]);
@@ -351,10 +366,25 @@ typedef struct SwsContext {
int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
- uint8_t *table_rV[256];
- uint8_t *table_gU[256];
- int table_gV[256];
- uint8_t *table_bU[256];
+ // alignment ensures the offset can be added in a single
+ // instruction on e.g. ARM
+ DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points
+#define RY_IDX 0
+#define GY_IDX 1
+#define BY_IDX 2
+#define RU_IDX 3
+#define GU_IDX 4
+#define BU_IDX 5
+#define RV_IDX 6
+#define GV_IDX 7
+#define BV_IDX 8
+#define RGB2YUV_SHIFT 15
+
+ int *dither_error[4];
//Colorspace stuff
int contrast, brightness, saturation; // for sws_getColorspaceDetails
@@ -362,6 +392,14 @@ typedef struct SwsContext {
int dstColorspaceTable[4];
int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
+ int src0Alpha;
+ int dst0Alpha;
+ int srcXYZ;
+ int dstXYZ;
+ int src_h_chr_pos;
+ int dst_h_chr_pos;
+ int src_v_chr_pos;
+ int dst_v_chr_pos;
int yuv2rgb_y_offset;
int yuv2rgb_y_coeff;
int yuv2rgb_v2r_coeff;
@@ -381,18 +419,19 @@ typedef struct SwsContext {
#define U_OFFSET "9*8"
#define V_OFFSET "10*8"
#define LUM_MMX_FILTER_OFFSET "11*8"
-#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
-#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
-#define ESP_OFFSET "11*8+4*4*256*2+8"
-#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
-#define U_TEMP "11*8+4*4*256*2+24"
-#define V_TEMP "11*8+4*4*256*2+32"
-#define Y_TEMP "11*8+4*4*256*2+40"
-#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
-#define UV_OFF_PX "11*8+4*4*256*3+48"
-#define UV_OFF_BYTE "11*8+4*4*256*3+56"
-#define DITHER16 "11*8+4*4*256*3+64"
-#define DITHER32 "11*8+4*4*256*3+80"
+#define CHR_MMX_FILTER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)
+#define DSTW_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2"
+#define ESP_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+8"
+#define VROUNDER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+16"
+#define U_TEMP "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+24"
+#define V_TEMP "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+32"
+#define Y_TEMP "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+40"
+#define ALP_MMX_FILTER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+48"
+#define UV_OFF_PX "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+48"
+#define UV_OFF_BYTE "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+56"
+#define DITHER16 "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+64"
+#define DITHER32 "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+80"
+#define DITHER32_INT (11*8+4*4*MAX_FILTER_SIZE*3+80) // value equal to above, used for checking that the struct hasn't been changed by mistake
DECLARE_ALIGNED(8, uint64_t, redDither);
DECLARE_ALIGNED(8, uint64_t, greenDither);
@@ -418,8 +457,8 @@ typedef struct SwsContext {
// alignment of these values is not necessary, but merely here
// to maintain the same offset across x8632 and x86-64. Once we
// use proper offset macros in the asm, they can be removed.
- DECLARE_ALIGNED(8, ptrdiff_t, uv_off_px); ///< offset (in pixels) between u and v planes
- DECLARE_ALIGNED(8, ptrdiff_t, uv_off_byte); ///< offset (in bytes) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
DECLARE_ALIGNED(8, uint16_t, dither16)[8];
DECLARE_ALIGNED(8, uint32_t, dither32)[8];
@@ -436,6 +475,18 @@ typedef struct SwsContext {
vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
+ int use_mmx_vfilter;
+
+/* pre defined color-spaces gamma */
+#define XYZ_GAMMA (2.6f)
+#define RGB_GAMMA (2.2f)
+ int16_t *xyzgamma;
+ int16_t *rgbgamma;
+ int16_t *xyzgammainv;
+ int16_t *rgbgammainv;
+ int16_t xyz2rgb_matrix[3][4];
+ int16_t rgb2xyz_matrix[3][4];
+
/* function pointers for swscale() */
yuv2planar1_fn yuv2plane1;
yuv2planarX_fn yuv2planeX;
@@ -446,24 +497,25 @@ typedef struct SwsContext {
yuv2anyX_fn yuv2anyX;
/// Unscaled conversion of luma plane to YV12 for horizontal scaler.
- void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
+ void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
- void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
+ void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+ const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/**
* Functions to read planar input, such as planar RGB, and convert
- * internally to Y/UV.
+ * internally to Y/UV/A.
*/
/** @{ */
- void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width);
+ void (*readLumPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv);
void (*readChrPlanar)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4],
- int width);
+ int width, int32_t *rgb2yuv);
+ void (*readAlpPlanar)(uint8_t *dst, const uint8_t *src[4], int width, int32_t *rgb2yuv);
/** @} */
/**
@@ -539,6 +591,8 @@ typedef struct SwsContext {
void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width);
int needs_hcscale; ///< Set if there are chroma planes to be converted.
+
+ SwsDither dither;
} SwsContext;
//FIXME check init (where 0)
@@ -552,10 +606,18 @@ void ff_yuv2rgb_init_tables_ppc(SwsContext *c, const int inv_table[4],
void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf);
+av_cold void ff_sws_init_range_convert(SwsContext *c);
+
SwsFunc ff_yuv2rgb_init_x86(SwsContext *c);
SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c);
+#if FF_API_SWS_FORMAT_NAME
+/**
+ * @deprecated Use av_get_pix_fmt_name() instead.
+ */
+attribute_deprecated
const char *sws_format_name(enum AVPixelFormat format);
+#endif
static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
{
@@ -568,9 +630,11 @@ static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
av_assert0(desc);
- return desc->comp[0].depth_minus1 == 8 || desc->comp[0].depth_minus1 == 9;
+ return desc->comp[0].depth_minus1 >= 8 && desc->comp[0].depth_minus1 <= 13;
}
+#define isNBPS(x) is9_OR_10BPS(x)
+
static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -601,8 +665,8 @@ static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
#if 0 // FIXME
#define isGray(x) \
- (!(av_pix_fmt_descriptors[x].flags & AV_PIX_FMT_FLAG_PAL) && \
- av_pix_fmt_descriptors[x].nb_components <= 2)
+ (!(av_pix_fmt_desc_get(x)->flags & AV_PIX_FMT_FLAG_PAL) && \
+ av_pix_fmt_desc_get(x)->nb_components <= 2)
#else
#define isGray(x) \
((x) == AV_PIX_FMT_GRAY8 || \
@@ -613,8 +677,9 @@ static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
(x) == AV_PIX_FMT_YA16LE)
#endif
-#define isRGBinInt(x) \
- ((x) == AV_PIX_FMT_RGB48BE || \
+#define isRGBinInt(x) \
+ ( \
+ (x) == AV_PIX_FMT_RGB48BE || \
(x) == AV_PIX_FMT_RGB48LE || \
(x) == AV_PIX_FMT_RGB32 || \
(x) == AV_PIX_FMT_RGB32_1 || \
@@ -631,10 +696,11 @@ static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
(x) == AV_PIX_FMT_RGBA64BE || \
(x) == AV_PIX_FMT_RGBA64LE || \
(x) == AV_PIX_FMT_MONOBLACK || \
- (x) == AV_PIX_FMT_MONOWHITE)
-
-#define isBGRinInt(x) \
- ((x) == AV_PIX_FMT_BGR48BE || \
+ (x) == AV_PIX_FMT_MONOWHITE \
+ )
+#define isBGRinInt(x) \
+ ( \
+ (x) == AV_PIX_FMT_BGR48BE || \
(x) == AV_PIX_FMT_BGR48LE || \
(x) == AV_PIX_FMT_BGR32 || \
(x) == AV_PIX_FMT_BGR32_1 || \
@@ -651,19 +717,73 @@ static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
(x) == AV_PIX_FMT_BGRA64BE || \
(x) == AV_PIX_FMT_BGRA64LE || \
(x) == AV_PIX_FMT_MONOBLACK || \
- (x) == AV_PIX_FMT_MONOWHITE)
-
-#define isAnyRGB(x) \
- (isRGBinInt(x) || \
- isBGRinInt(x))
+ (x) == AV_PIX_FMT_MONOWHITE \
+ )
+
+#define isRGBinBytes(x) ( \
+ (x) == AV_PIX_FMT_RGB48BE \
+ || (x) == AV_PIX_FMT_RGB48LE \
+ || (x) == AV_PIX_FMT_RGBA64BE \
+ || (x) == AV_PIX_FMT_RGBA64LE \
+ || (x) == AV_PIX_FMT_RGBA \
+ || (x) == AV_PIX_FMT_ARGB \
+ || (x) == AV_PIX_FMT_RGB24 \
+ )
+#define isBGRinBytes(x) ( \
+ (x) == AV_PIX_FMT_BGR48BE \
+ || (x) == AV_PIX_FMT_BGR48LE \
+ || (x) == AV_PIX_FMT_BGRA64BE \
+ || (x) == AV_PIX_FMT_BGRA64LE \
+ || (x) == AV_PIX_FMT_BGRA \
+ || (x) == AV_PIX_FMT_ABGR \
+ || (x) == AV_PIX_FMT_BGR24 \
+ )
+
+#define isBayer(x) ( \
+ (x)==AV_PIX_FMT_BAYER_BGGR8 \
+ || (x)==AV_PIX_FMT_BAYER_BGGR16LE \
+ || (x)==AV_PIX_FMT_BAYER_BGGR16BE \
+ || (x)==AV_PIX_FMT_BAYER_RGGB8 \
+ || (x)==AV_PIX_FMT_BAYER_RGGB16LE \
+ || (x)==AV_PIX_FMT_BAYER_RGGB16BE \
+ || (x)==AV_PIX_FMT_BAYER_GBRG8 \
+ || (x)==AV_PIX_FMT_BAYER_GBRG16LE \
+ || (x)==AV_PIX_FMT_BAYER_GBRG16BE \
+ || (x)==AV_PIX_FMT_BAYER_GRBG8 \
+ || (x)==AV_PIX_FMT_BAYER_GRBG16LE \
+ || (x)==AV_PIX_FMT_BAYER_GRBG16BE \
+ )
+
+#define isAnyRGB(x) \
+ ( \
+ isBayer(x) || \
+ isRGBinInt(x) || \
+ isBGRinInt(x) || \
+ isRGB(x) \
+ )
static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
av_assert0(desc);
- return desc->nb_components == 2 || desc->nb_components == 4;
+ if (pix_fmt == AV_PIX_FMT_PAL8)
+ return 1;
+ return desc->flags & AV_PIX_FMT_FLAG_ALPHA;
}
+#if 1
+#define isPacked(x) ( \
+ (x)==AV_PIX_FMT_PAL8 \
+ || (x)==AV_PIX_FMT_YUYV422 \
+ || (x)==AV_PIX_FMT_YVYU422 \
+ || (x)==AV_PIX_FMT_UYVY422 \
+ || (x)==AV_PIX_FMT_YA8 \
+ || (x)==AV_PIX_FMT_YA16LE \
+ || (x)==AV_PIX_FMT_YA16BE \
+ || isRGBinInt(x) \
+ || isBGRinInt(x) \
+ )
+#else
static av_always_inline int isPacked(enum AVPixelFormat pix_fmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -672,6 +792,7 @@ static av_always_inline int isPacked(enum AVPixelFormat pix_fmt)
pix_fmt == AV_PIX_FMT_PAL8);
}
+#endif
static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -698,18 +819,19 @@ static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
av_assert0(desc);
- return ((desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) ||
- pix_fmt == AV_PIX_FMT_YA8);
+ return (desc->flags & AV_PIX_FMT_FLAG_PAL) || (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL);
}
extern const uint64_t ff_dither4[2];
extern const uint64_t ff_dither8[2];
-extern const uint8_t ff_dither_4x4_16[4][8];
-extern const uint8_t ff_dither_8x8_32[8][8];
-extern const uint8_t ff_dither_8x8_73[8][8];
-extern const uint8_t ff_dither_8x8_128[8][8];
-extern const uint8_t ff_dither_8x8_220[8][8];
+extern const uint8_t ff_dither_2x2_4[3][8];
+extern const uint8_t ff_dither_2x2_8[3][8];
+extern const uint8_t ff_dither_4x4_16[5][8];
+extern const uint8_t ff_dither_8x8_32[9][8];
+extern const uint8_t ff_dither_8x8_73[9][8];
+extern const uint8_t ff_dither_8x8_128[9][8];
+extern const uint8_t ff_dither_8x8_220[9][8];
extern const int32_t ff_yuv2rgb_coeffs[8][4];
@@ -721,6 +843,7 @@ extern const AVClass sws_context_class;
*/
void ff_get_unscaled_swscale(SwsContext *c);
void ff_get_unscaled_swscale_ppc(SwsContext *c);
+void ff_get_unscaled_swscale_arm(SwsContext *c);
/**
* Return function pointer to fastest main scaler path function depending
@@ -740,4 +863,39 @@ void ff_sws_init_output_funcs(SwsContext *c,
void ff_sws_init_swscale_ppc(SwsContext *c);
void ff_sws_init_swscale_x86(SwsContext *c);
+void ff_hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc);
+void ff_hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc);
+int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
+ int16_t *filter, int32_t *filterPos,
+ int numSplits);
+void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
+ int dstWidth, const uint8_t *src,
+ int srcW, int xInc);
+void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc);
+
+static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
+ int alpha, int bits, const int big_endian)
+{
+ int i, j;
+ uint8_t *ptr = plane + stride * y;
+ int v = alpha ? 0xFFFF>>(15-bits) : (1<<bits);
+ for (i = 0; i < height; i++) {
+#define FILL(wfunc) \
+ for (j = 0; j < width; j++) {\
+ wfunc(ptr+2*j, v);\
+ }
+ if (big_endian) {
+ FILL(AV_WB16);
+ } else {
+ FILL(AV_WL16);
+ }
+ ptr += stride;
+ }
+}
+
#endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index ffc813e..da457df 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -23,7 +23,6 @@
#include <math.h>
#include <stdio.h>
#include "config.h"
-#include <assert.h>
#include "swscale.h"
#include "swscale_internal.h"
#include "rgb2rgb.h"
@@ -33,58 +32,101 @@
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h"
#include "libavutil/pixdesc.h"
+#include "libavutil/avassert.h"
-DECLARE_ALIGNED(8, static const uint8_t, dither_8x8_1)[8][8] = {
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
-};
-DECLARE_ALIGNED(8, static const uint8_t, dither_8x8_3)[8][8] = {
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
-};
-DECLARE_ALIGNED(8, static const uint8_t, dither_8x8_64)[8][8] = {
- { 18, 34, 30, 46, 17, 33, 29, 45,},
- { 50, 2, 62, 14, 49, 1, 61, 13,},
- { 26, 42, 22, 38, 25, 41, 21, 37,},
- { 58, 10, 54, 6, 57, 9, 53, 5,},
- { 16, 32, 28, 44, 19, 35, 31, 47,},
- { 48, 0, 60, 12, 51, 3, 63, 15,},
- { 24, 40, 20, 36, 27, 43, 23, 39,},
- { 56, 8, 52, 4, 59, 11, 55, 7,},
-};
-DECLARE_ALIGNED(8, static const uint8_t, dither_8x8_256)[8][8] = {
- { 72, 136, 120, 184, 68, 132, 116, 180,},
- { 200, 8, 248, 56, 196, 4, 244, 52,},
- { 104, 168, 88, 152, 100, 164, 84, 148,},
- { 232, 40, 216, 24, 228, 36, 212, 20,},
- { 64, 128, 102, 176, 76, 140, 124, 188,},
- { 192, 0, 240, 48, 204, 12, 252, 60,},
- { 96, 160, 80, 144, 108, 172, 92, 156,},
- { 224, 32, 208, 16, 236, 44, 220, 28,},
+DECLARE_ALIGNED(8, static const uint8_t, dithers)[8][8][8]={
+{
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+},{
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+},{
+ { 2, 4, 3, 5, 2, 4, 3, 5,},
+ { 6, 0, 7, 1, 6, 0, 7, 1,},
+ { 3, 5, 2, 4, 3, 5, 2, 4,},
+ { 7, 1, 6, 0, 7, 1, 6, 0,},
+ { 2, 4, 3, 5, 2, 4, 3, 5,},
+ { 6, 0, 7, 1, 6, 0, 7, 1,},
+ { 3, 5, 2, 4, 3, 5, 2, 4,},
+ { 7, 1, 6, 0, 7, 1, 6, 0,},
+},{
+ { 4, 8, 7, 11, 4, 8, 7, 11,},
+ { 12, 0, 15, 3, 12, 0, 15, 3,},
+ { 6, 10, 5, 9, 6, 10, 5, 9,},
+ { 14, 2, 13, 1, 14, 2, 13, 1,},
+ { 4, 8, 7, 11, 4, 8, 7, 11,},
+ { 12, 0, 15, 3, 12, 0, 15, 3,},
+ { 6, 10, 5, 9, 6, 10, 5, 9,},
+ { 14, 2, 13, 1, 14, 2, 13, 1,},
+},{
+ { 9, 17, 15, 23, 8, 16, 14, 22,},
+ { 25, 1, 31, 7, 24, 0, 30, 6,},
+ { 13, 21, 11, 19, 12, 20, 10, 18,},
+ { 29, 5, 27, 3, 28, 4, 26, 2,},
+ { 8, 16, 14, 22, 9, 17, 15, 23,},
+ { 24, 0, 30, 6, 25, 1, 31, 7,},
+ { 12, 20, 10, 18, 13, 21, 11, 19,},
+ { 28, 4, 26, 2, 29, 5, 27, 3,},
+},{
+ { 18, 34, 30, 46, 17, 33, 29, 45,},
+ { 50, 2, 62, 14, 49, 1, 61, 13,},
+ { 26, 42, 22, 38, 25, 41, 21, 37,},
+ { 58, 10, 54, 6, 57, 9, 53, 5,},
+ { 16, 32, 28, 44, 19, 35, 31, 47,},
+ { 48, 0, 60, 12, 51, 3, 63, 15,},
+ { 24, 40, 20, 36, 27, 43, 23, 39,},
+ { 56, 8, 52, 4, 59, 11, 55, 7,},
+},{
+ { 18, 34, 30, 46, 17, 33, 29, 45,},
+ { 50, 2, 62, 14, 49, 1, 61, 13,},
+ { 26, 42, 22, 38, 25, 41, 21, 37,},
+ { 58, 10, 54, 6, 57, 9, 53, 5,},
+ { 16, 32, 28, 44, 19, 35, 31, 47,},
+ { 48, 0, 60, 12, 51, 3, 63, 15,},
+ { 24, 40, 20, 36, 27, 43, 23, 39,},
+ { 56, 8, 52, 4, 59, 11, 55, 7,},
+},{
+ { 36, 68, 60, 92, 34, 66, 58, 90,},
+ { 100, 4,124, 28, 98, 2,122, 26,},
+ { 52, 84, 44, 76, 50, 82, 42, 74,},
+ { 116, 20,108, 12,114, 18,106, 10,},
+ { 32, 64, 56, 88, 38, 70, 62, 94,},
+ { 96, 0,120, 24,102, 6,126, 30,},
+ { 48, 80, 40, 72, 54, 86, 46, 78,},
+ { 112, 16,104, 8,118, 22,110, 14,},
+}};
+
+static const uint16_t dither_scale[15][16]={
+{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
+{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
+{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
+{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
+{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
+{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
+{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
+{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
+{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
+{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
+{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
+{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
+{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
+{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
+{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
};
-#define RGB2YUV_SHIFT 15
-#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define BU ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GY ( (int) (0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GV (-(int) (0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define GU (-(int) (0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RY ( (int) (0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
-#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val)
@@ -97,27 +139,6 @@ static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
}
}
-static void fill_plane9or10(uint8_t *plane, int stride, int width,
- int height, int y, uint8_t val,
- const int dst_depth, const int big_endian)
-{
- int i, j;
- uint16_t *dst = (uint16_t *) (plane + stride * y);
-#define FILL8TO9_OR_10(wfunc) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < width; j++) { \
- wfunc(&dst[j], (val << (dst_depth - 8)) | \
- (val >> (16 - dst_depth))); \
- } \
- dst += stride / 2; \
- }
- if (big_endian) {
- FILL8TO9_OR_10(AV_WB16);
- } else {
- FILL8TO9_OR_10(AV_WL16);
- }
-}
-
static void copyPlane(const uint8_t *src, int srcStride,
int srcSliceY, int srcSliceH, int width,
uint8_t *dst, int dstStride)
@@ -321,19 +342,23 @@ static int packed_16bpc_bswap(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int i, j;
- int srcstr = srcStride[0] >> 1;
- int dststr = dstStride[0] >> 1;
- uint16_t *dstPtr = (uint16_t *) dst[0];
- const uint16_t *srcPtr = (const uint16_t *) src[0];
- int min_stride = FFMIN(srcstr, dststr);
-
- for (i = 0; i < srcSliceH; i++) {
- for (j = 0; j < min_stride; j++) {
- dstPtr[j] = av_bswap16(srcPtr[j]);
+ int i, j, p;
+
+ for (p = 0; p < 4; p++) {
+ int srcstr = srcStride[p] / 2;
+ int dststr = dstStride[p] / 2;
+ uint16_t *dstPtr = (uint16_t *) dst[p];
+ const uint16_t *srcPtr = (const uint16_t *) src[p];
+ int min_stride = FFMIN(FFABS(srcstr), FFABS(dststr));
+ if(!dstPtr || !srcPtr)
+ continue;
+ for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
+ for (j = 0; j < min_stride; j++) {
+ dstPtr[j] = av_bswap16(srcPtr[j]);
+ }
+ srcPtr += srcstr;
+ dstPtr += dststr;
}
- srcPtr += srcstr;
- dstPtr += dststr;
}
return srcSliceH;
@@ -373,7 +398,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
if (!conv)
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
- sws_format_name(srcFormat), sws_format_name(dstFormat));
+ av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
else {
for (i = 0; i < srcSliceH; i++) {
conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
@@ -385,6 +410,371 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
return srcSliceH;
}
+static void packed16togbra16(const uint8_t *src, int srcStride,
+ uint16_t *dst[], int dstStride[], int srcSliceH,
+ int src_alpha, int swap, int shift, int width)
+{
+ int x, h, i;
+ int dst_alpha = dst[3] != NULL;
+ for (h = 0; h < srcSliceH; h++) {
+ uint16_t *src_line = (uint16_t *)(src + srcStride * h);
+ switch (swap) {
+ case 3:
+ if (src_alpha && dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[3][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ }
+ } else if (dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[3][x] = 0xFFFF;
+ }
+ } else if (src_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ src_line++;
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[1][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ dst[2][x] = av_bswap16(av_bswap16(*src_line++) >> shift);
+ }
+ }
+ break;
+ case 2:
+ if (src_alpha && dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++ >> shift);
+ dst[1][x] = av_bswap16(*src_line++ >> shift);
+ dst[2][x] = av_bswap16(*src_line++ >> shift);
+ dst[3][x] = av_bswap16(*src_line++ >> shift);
+ }
+ } else if (dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++ >> shift);
+ dst[1][x] = av_bswap16(*src_line++ >> shift);
+ dst[2][x] = av_bswap16(*src_line++ >> shift);
+ dst[3][x] = 0xFFFF;
+ }
+ } else if (src_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++ >> shift);
+ dst[1][x] = av_bswap16(*src_line++ >> shift);
+ dst[2][x] = av_bswap16(*src_line++ >> shift);
+ src_line++;
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++ >> shift);
+ dst[1][x] = av_bswap16(*src_line++ >> shift);
+ dst[2][x] = av_bswap16(*src_line++ >> shift);
+ }
+ }
+ break;
+ case 1:
+ if (src_alpha && dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++) >> shift;
+ dst[1][x] = av_bswap16(*src_line++) >> shift;
+ dst[2][x] = av_bswap16(*src_line++) >> shift;
+ dst[3][x] = av_bswap16(*src_line++) >> shift;
+ }
+ } else if (dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++) >> shift;
+ dst[1][x] = av_bswap16(*src_line++) >> shift;
+ dst[2][x] = av_bswap16(*src_line++) >> shift;
+ dst[3][x] = 0xFFFF;
+ }
+ } else if (src_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++) >> shift;
+ dst[1][x] = av_bswap16(*src_line++) >> shift;
+ dst[2][x] = av_bswap16(*src_line++) >> shift;
+ src_line++;
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = av_bswap16(*src_line++) >> shift;
+ dst[1][x] = av_bswap16(*src_line++) >> shift;
+ dst[2][x] = av_bswap16(*src_line++) >> shift;
+ }
+ }
+ break;
+ default:
+ if (src_alpha && dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = *src_line++ >> shift;
+ dst[1][x] = *src_line++ >> shift;
+ dst[2][x] = *src_line++ >> shift;
+ dst[3][x] = *src_line++ >> shift;
+ }
+ } else if (dst_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = *src_line++ >> shift;
+ dst[1][x] = *src_line++ >> shift;
+ dst[2][x] = *src_line++ >> shift;
+ dst[3][x] = 0xFFFF;
+ }
+ } else if (src_alpha) {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = *src_line++ >> shift;
+ dst[1][x] = *src_line++ >> shift;
+ dst[2][x] = *src_line++ >> shift;
+ src_line++;
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ dst[0][x] = *src_line++ >> shift;
+ dst[1][x] = *src_line++ >> shift;
+ dst[2][x] = *src_line++ >> shift;
+ }
+ }
+ }
+ for (i = 0; i < 4; i++)
+ dst[i] += dstStride[i] >> 1;
+ }
+}
+
+static int Rgb16ToPlanarRgb16Wrapper(SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ uint16_t *dst2013[] = { (uint16_t *)dst[2], (uint16_t *)dst[0], (uint16_t *)dst[1], (uint16_t *)dst[3] };
+ uint16_t *dst1023[] = { (uint16_t *)dst[1], (uint16_t *)dst[0], (uint16_t *)dst[2], (uint16_t *)dst[3] };
+ int stride2013[] = { dstStride[2], dstStride[0], dstStride[1], dstStride[3] };
+ int stride1023[] = { dstStride[1], dstStride[0], dstStride[2], dstStride[3] };
+ const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
+ const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
+ int bpc = dst_format->comp[0].depth_minus1 + 1;
+ int alpha = src_format->flags & AV_PIX_FMT_FLAG_ALPHA;
+ int swap = 0;
+ if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) ||
+ !HAVE_BIGENDIAN && src_format->flags & AV_PIX_FMT_FLAG_BE)
+ swap++;
+ if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) ||
+ !HAVE_BIGENDIAN && dst_format->flags & AV_PIX_FMT_FLAG_BE)
+ swap += 2;
+
+ if ((dst_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) !=
+ (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) || bpc < 9) {
+ av_log(c, AV_LOG_ERROR, "unsupported conversion to planar RGB %s -> %s\n",
+ src_format->name, dst_format->name);
+ return srcSliceH;
+ }
+ switch (c->srcFormat) {
+ case AV_PIX_FMT_RGB48LE:
+ case AV_PIX_FMT_RGB48BE:
+ case AV_PIX_FMT_RGBA64LE:
+ case AV_PIX_FMT_RGBA64BE:
+ packed16togbra16(src[0] + srcSliceY * srcStride[0], srcStride[0],
+ dst2013, stride2013, srcSliceH, alpha, swap,
+ 16 - bpc, c->srcW);
+ break;
+ case AV_PIX_FMT_BGR48LE:
+ case AV_PIX_FMT_BGR48BE:
+ case AV_PIX_FMT_BGRA64LE:
+ case AV_PIX_FMT_BGRA64BE:
+ packed16togbra16(src[0] + srcSliceY * srcStride[0], srcStride[0],
+ dst1023, stride1023, srcSliceH, alpha, swap,
+ 16 - bpc, c->srcW);
+ break;
+ default:
+ av_log(c, AV_LOG_ERROR,
+ "unsupported conversion to planar RGB %s -> %s\n",
+ src_format->name, dst_format->name);
+ }
+
+ return srcSliceH;
+}
+
+static void gbr16ptopacked16(const uint16_t *src[], int srcStride[],
+ uint8_t *dst, int dstStride, int srcSliceH,
+ int alpha, int swap, int bpp, int width)
+{
+ int x, h, i;
+ int src_alpha = src[3] != NULL;
+ int scale_high = 16 - bpp, scale_low = (bpp - 8) * 2;
+ for (h = 0; h < srcSliceH; h++) {
+ uint16_t *dest = (uint16_t *)(dst + dstStride * h);
+ uint16_t component;
+
+ switch(swap) {
+ case 3:
+ if (alpha && !src_alpha) {
+ for (x = 0; x < width; x++) {
+ component = av_bswap16(src[0][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[1][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[2][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ *dest++ = 0xffff;
+ }
+ } else if (alpha && src_alpha) {
+ for (x = 0; x < width; x++) {
+ component = av_bswap16(src[0][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[1][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[2][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[3][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ component = av_bswap16(src[0][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[1][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ component = av_bswap16(src[2][x]);
+ *dest++ = av_bswap16(component << scale_high | component >> scale_low);
+ }
+ }
+ break;
+ case 2:
+ if (alpha && !src_alpha) {
+ for (x = 0; x < width; x++) {
+ *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low);
+ *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low);
+ *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low);
+ *dest++ = 0xffff;
+ }
+ } else if (alpha && src_alpha) {
+ for (x = 0; x < width; x++) {
+ *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low);
+ *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low);
+ *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low);
+ *dest++ = av_bswap16(src[3][x] << scale_high | src[3][x] >> scale_low);
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ *dest++ = av_bswap16(src[0][x] << scale_high | src[0][x] >> scale_low);
+ *dest++ = av_bswap16(src[1][x] << scale_high | src[1][x] >> scale_low);
+ *dest++ = av_bswap16(src[2][x] << scale_high | src[2][x] >> scale_low);
+ }
+ }
+ break;
+ case 1:
+ if (alpha && !src_alpha) {
+ for (x = 0; x < width; x++) {
+ *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low;
+ *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low;
+ *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low;
+ *dest++ = 0xffff;
+ }
+ } else if (alpha && src_alpha) {
+ for (x = 0; x < width; x++) {
+ *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low;
+ *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low;
+ *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low;
+ *dest++ = av_bswap16(src[3][x]) << scale_high | av_bswap16(src[3][x]) >> scale_low;
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ *dest++ = av_bswap16(src[0][x]) << scale_high | av_bswap16(src[0][x]) >> scale_low;
+ *dest++ = av_bswap16(src[1][x]) << scale_high | av_bswap16(src[1][x]) >> scale_low;
+ *dest++ = av_bswap16(src[2][x]) << scale_high | av_bswap16(src[2][x]) >> scale_low;
+ }
+ }
+ break;
+ default:
+ if (alpha && !src_alpha) {
+ for (x = 0; x < width; x++) {
+ *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low;
+ *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low;
+ *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low;
+ *dest++ = 0xffff;
+ }
+ } else if (alpha && src_alpha) {
+ for (x = 0; x < width; x++) {
+ *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low;
+ *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low;
+ *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low;
+ *dest++ = src[3][x] << scale_high | src[3][x] >> scale_low;
+ }
+ } else {
+ for (x = 0; x < width; x++) {
+ *dest++ = src[0][x] << scale_high | src[0][x] >> scale_low;
+ *dest++ = src[1][x] << scale_high | src[1][x] >> scale_low;
+ *dest++ = src[2][x] << scale_high | src[2][x] >> scale_low;
+ }
+ }
+ }
+ for (i = 0; i < 3 + src_alpha; i++)
+ src[i] += srcStride[i] >> 1;
+ }
+}
+
+static int planarRgb16ToRgb16Wrapper(SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ const uint16_t *src102[] = { (uint16_t *)src[1], (uint16_t *)src[0], (uint16_t *)src[2], (uint16_t *)src[3] };
+ const uint16_t *src201[] = { (uint16_t *)src[2], (uint16_t *)src[0], (uint16_t *)src[1], (uint16_t *)src[3] };
+ int stride102[] = { srcStride[1], srcStride[0], srcStride[2], srcStride[3] };
+ int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] };
+ const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
+ const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
+ int bits_per_sample = src_format->comp[0].depth_minus1 + 1;
+ int swap = 0;
+ if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) ||
+ !HAVE_BIGENDIAN && src_format->flags & AV_PIX_FMT_FLAG_BE)
+ swap++;
+ if ( HAVE_BIGENDIAN && !(dst_format->flags & AV_PIX_FMT_FLAG_BE) ||
+ !HAVE_BIGENDIAN && dst_format->flags & AV_PIX_FMT_FLAG_BE)
+ swap += 2;
+
+ if ((src_format->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) !=
+ (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB) ||
+ bits_per_sample <= 8) {
+ av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n",
+ src_format->name, dst_format->name);
+ return srcSliceH;
+ }
+ switch (c->dstFormat) {
+ case AV_PIX_FMT_BGR48LE:
+ case AV_PIX_FMT_BGR48BE:
+ gbr16ptopacked16(src102, stride102,
+ dst[0] + srcSliceY * dstStride[0], dstStride[0],
+ srcSliceH, 0, swap, bits_per_sample, c->srcW);
+ break;
+ case AV_PIX_FMT_RGB48LE:
+ case AV_PIX_FMT_RGB48BE:
+ gbr16ptopacked16(src201, stride201,
+ dst[0] + srcSliceY * dstStride[0], dstStride[0],
+ srcSliceH, 0, swap, bits_per_sample, c->srcW);
+ break;
+ case AV_PIX_FMT_RGBA64LE:
+ case AV_PIX_FMT_RGBA64BE:
+ gbr16ptopacked16(src201, stride201,
+ dst[0] + srcSliceY * dstStride[0], dstStride[0],
+ srcSliceH, 1, swap, bits_per_sample, c->srcW);
+ break;
+ case AV_PIX_FMT_BGRA64LE:
+ case AV_PIX_FMT_BGRA64BE:
+ gbr16ptopacked16(src102, stride102,
+ dst[0] + srcSliceY * dstStride[0], dstStride[0],
+ srcSliceH, 1, swap, bits_per_sample, c->srcW);
+ break;
+ default:
+ av_log(c, AV_LOG_ERROR,
+ "unsupported planar RGB conversion %s -> %s\n",
+ src_format->name, dst_format->name);
+ }
+
+ return srcSliceH;
+}
+
static void gbr24ptopacked24(const uint8_t *src[], int srcStride[],
uint8_t *dst, int dstStride, int srcSliceH,
int width)
@@ -488,6 +878,22 @@ static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[],
return srcSliceH;
}
+static int planarRgbToplanarRgbWrapper(SwsContext *c, const uint8_t *src[],
+ int srcStride[], int srcSliceY, int srcSliceH,
+ uint8_t *dst[], int dstStride[])
+{
+ copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+ dst[0], dstStride[0]);
+ copyPlane(src[1], srcStride[1], srcSliceY, srcSliceH, c->srcW,
+ dst[1], dstStride[1]);
+ copyPlane(src[2], srcStride[2], srcSliceY, srcSliceH, c->srcW,
+ dst[2], dstStride[2]);
+ if (dst[3])
+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
+
+ return srcSliceH;
+}
+
static void packedtogbr24p(const uint8_t *src, int srcStride,
uint8_t *dst[], int dstStride[], int srcSliceH,
int alpha_first, int inc_size, int width)
@@ -562,6 +968,160 @@ static int rgbToPlanarRgbWrapper(SwsContext *c, const uint8_t *src[],
return srcSliceH;
}
+#define BAYER_GBRG
+#define BAYER_8
+#define BAYER_RENAME(x) bayer_gbrg8_to_##x
+#include "bayer_template.c"
+
+#define BAYER_GBRG
+#define BAYER_16LE
+#define BAYER_RENAME(x) bayer_gbrg16le_to_##x
+#include "bayer_template.c"
+
+#define BAYER_GBRG
+#define BAYER_16BE
+#define BAYER_RENAME(x) bayer_gbrg16be_to_##x
+#include "bayer_template.c"
+
+#define BAYER_GRBG
+#define BAYER_8
+#define BAYER_RENAME(x) bayer_grbg8_to_##x
+#include "bayer_template.c"
+
+#define BAYER_GRBG
+#define BAYER_16LE
+#define BAYER_RENAME(x) bayer_grbg16le_to_##x
+#include "bayer_template.c"
+
+#define BAYER_GRBG
+#define BAYER_16BE
+#define BAYER_RENAME(x) bayer_grbg16be_to_##x
+#include "bayer_template.c"
+
+#define BAYER_BGGR
+#define BAYER_8
+#define BAYER_RENAME(x) bayer_bggr8_to_##x
+#include "bayer_template.c"
+
+#define BAYER_BGGR
+#define BAYER_16LE
+#define BAYER_RENAME(x) bayer_bggr16le_to_##x
+#include "bayer_template.c"
+
+#define BAYER_BGGR
+#define BAYER_16BE
+#define BAYER_RENAME(x) bayer_bggr16be_to_##x
+#include "bayer_template.c"
+
+#define BAYER_RGGB
+#define BAYER_8
+#define BAYER_RENAME(x) bayer_rggb8_to_##x
+#include "bayer_template.c"
+
+#define BAYER_RGGB
+#define BAYER_16LE
+#define BAYER_RENAME(x) bayer_rggb16le_to_##x
+#include "bayer_template.c"
+
+#define BAYER_RGGB
+#define BAYER_16BE
+#define BAYER_RENAME(x) bayer_rggb16be_to_##x
+#include "bayer_template.c"
+
+static int bayer_to_rgb24_wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+ uint8_t *dstPtr= dst[0];
+ const uint8_t *srcPtr= src[0];
+ int i;
+ void (*copy) (const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width);
+ void (*interpolate)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int width);
+
+ switch(c->srcFormat) {
+#define CASE(pixfmt, prefix) \
+ case pixfmt: copy = bayer_##prefix##_to_rgb24_copy; \
+ interpolate = bayer_##prefix##_to_rgb24_interpolate; \
+ break;
+ CASE(AV_PIX_FMT_BAYER_BGGR8, bggr8)
+ CASE(AV_PIX_FMT_BAYER_BGGR16LE, bggr16le)
+ CASE(AV_PIX_FMT_BAYER_BGGR16BE, bggr16be)
+ CASE(AV_PIX_FMT_BAYER_RGGB8, rggb8)
+ CASE(AV_PIX_FMT_BAYER_RGGB16LE, rggb16le)
+ CASE(AV_PIX_FMT_BAYER_RGGB16BE, rggb16be)
+ CASE(AV_PIX_FMT_BAYER_GBRG8, gbrg8)
+ CASE(AV_PIX_FMT_BAYER_GBRG16LE, gbrg16le)
+ CASE(AV_PIX_FMT_BAYER_GBRG16BE, gbrg16be)
+ CASE(AV_PIX_FMT_BAYER_GRBG8, grbg8)
+ CASE(AV_PIX_FMT_BAYER_GRBG16LE, grbg16le)
+ CASE(AV_PIX_FMT_BAYER_GRBG16BE, grbg16be)
+#undef CASE
+ default: return 0;
+ }
+
+ copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW);
+ srcPtr += 2 * srcStride[0];
+ dstPtr += 2 * dstStride[0];
+
+ for (i = 2; i < srcSliceH - 2; i += 2) {
+ interpolate(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW);
+ srcPtr += 2 * srcStride[0];
+ dstPtr += 2 * dstStride[0];
+ }
+
+ copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW);
+ return srcSliceH;
+}
+
+static int bayer_to_yv12_wrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
+ int srcSliceH, uint8_t* dst[], int dstStride[])
+{
+ const uint8_t *srcPtr= src[0];
+ uint8_t *dstY= dst[0];
+ uint8_t *dstU= dst[1];
+ uint8_t *dstV= dst[2];
+ int i;
+ void (*copy) (const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv);
+ void (*interpolate)(const uint8_t *src, int src_stride, uint8_t *dstY, uint8_t *dstU, uint8_t *dstV, int luma_stride, int width, int32_t *rgb2yuv);
+
+ switch(c->srcFormat) {
+#define CASE(pixfmt, prefix) \
+ case pixfmt: copy = bayer_##prefix##_to_yv12_copy; \
+ interpolate = bayer_##prefix##_to_yv12_interpolate; \
+ break;
+ CASE(AV_PIX_FMT_BAYER_BGGR8, bggr8)
+ CASE(AV_PIX_FMT_BAYER_BGGR16LE, bggr16le)
+ CASE(AV_PIX_FMT_BAYER_BGGR16BE, bggr16be)
+ CASE(AV_PIX_FMT_BAYER_RGGB8, rggb8)
+ CASE(AV_PIX_FMT_BAYER_RGGB16LE, rggb16le)
+ CASE(AV_PIX_FMT_BAYER_RGGB16BE, rggb16be)
+ CASE(AV_PIX_FMT_BAYER_GBRG8, gbrg8)
+ CASE(AV_PIX_FMT_BAYER_GBRG16LE, gbrg16le)
+ CASE(AV_PIX_FMT_BAYER_GBRG16BE, gbrg16be)
+ CASE(AV_PIX_FMT_BAYER_GRBG8, grbg8)
+ CASE(AV_PIX_FMT_BAYER_GRBG16LE, grbg16le)
+ CASE(AV_PIX_FMT_BAYER_GRBG16BE, grbg16be)
+#undef CASE
+ default: return 0;
+ }
+
+ copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table);
+ srcPtr += 2 * srcStride[0];
+ dstY += 2 * dstStride[0];
+ dstU += dstStride[1];
+ dstV += dstStride[1];
+
+ for (i = 2; i < srcSliceH - 2; i += 2) {
+ interpolate(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table);
+ srcPtr += 2 * srcStride[0];
+ dstY += 2 * dstStride[0];
+ dstU += dstStride[1];
+ dstV += dstStride[1];
+ }
+
+ copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table);
+ return srcSliceH;
+}
+
#define isRGBA32(x) ( \
(x) == AV_PIX_FMT_ARGB \
|| (x) == AV_PIX_FMT_RGBA \
@@ -569,6 +1129,20 @@ static int rgbToPlanarRgbWrapper(SwsContext *c, const uint8_t *src[],
|| (x) == AV_PIX_FMT_ABGR \
)
+#define isRGBA64(x) ( \
+ (x) == AV_PIX_FMT_RGBA64LE \
+ || (x) == AV_PIX_FMT_RGBA64BE \
+ || (x) == AV_PIX_FMT_BGRA64LE \
+ || (x) == AV_PIX_FMT_BGRA64BE \
+ )
+
+#define isRGB48(x) ( \
+ (x) == AV_PIX_FMT_RGB48LE \
+ || (x) == AV_PIX_FMT_RGB48BE \
+ || (x) == AV_PIX_FMT_BGR48LE \
+ || (x) == AV_PIX_FMT_BGR48BE \
+ )
+
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
typedef void (* rgbConvFn) (const uint8_t *, uint8_t *, int);
static rgbConvFn findRgbConvFn(SwsContext *c)
@@ -578,17 +1152,11 @@ static rgbConvFn findRgbConvFn(SwsContext *c)
const int srcId = c->srcFormatBpp;
const int dstId = c->dstFormatBpp;
rgbConvFn conv = NULL;
- const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat);
- const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat);
#define IS_NOT_NE(bpp, desc) \
(((bpp + 7) >> 3) == 2 && \
(!(desc->flags & AV_PIX_FMT_FLAG_BE) != !HAVE_BIGENDIAN))
- /* if this is non-native rgb444/555/565, don't handle it here. */
- if (IS_NOT_NE(srcId, desc_src) || IS_NOT_NE(dstId, desc_dst))
- return NULL;
-
#define CONV_IS(src, dst) (srcFormat == AV_PIX_FMT_##src && dstFormat == AV_PIX_FMT_##dst)
if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
@@ -604,6 +1172,32 @@ static rgbConvFn findRgbConvFn(SwsContext *c)
|| CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
else if (CONV_IS(BGRA, ABGR)
|| CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
+ } else if (isRGB48(srcFormat) && isRGB48(dstFormat)) {
+ if (CONV_IS(RGB48LE, BGR48LE)
+ || CONV_IS(BGR48LE, RGB48LE)
+ || CONV_IS(RGB48BE, BGR48BE)
+ || CONV_IS(BGR48BE, RGB48BE)) conv = rgb48tobgr48_nobswap;
+ else if (CONV_IS(RGB48LE, BGR48BE)
+ || CONV_IS(BGR48LE, RGB48BE)
+ || CONV_IS(RGB48BE, BGR48LE)
+ || CONV_IS(BGR48BE, RGB48LE)) conv = rgb48tobgr48_bswap;
+ } else if (isRGBA64(srcFormat) && isRGB48(dstFormat)) {
+ if (CONV_IS(RGBA64LE, BGR48LE)
+ || CONV_IS(BGRA64LE, RGB48LE)
+ || CONV_IS(RGBA64BE, BGR48BE)
+ || CONV_IS(BGRA64BE, RGB48BE)) conv = rgb64tobgr48_nobswap;
+ else if (CONV_IS(RGBA64LE, BGR48BE)
+ || CONV_IS(BGRA64LE, RGB48BE)
+ || CONV_IS(RGBA64BE, BGR48LE)
+ || CONV_IS(BGRA64BE, RGB48LE)) conv = rgb64tobgr48_bswap;
+ else if (CONV_IS(RGBA64LE, RGB48LE)
+ || CONV_IS(BGRA64LE, BGR48LE)
+ || CONV_IS(RGBA64BE, RGB48BE)
+ || CONV_IS(BGRA64BE, BGR48BE)) conv = rgb64to48_nobswap;
+ else if (CONV_IS(RGBA64LE, RGB48BE)
+ || CONV_IS(BGRA64LE, BGR48BE)
+ || CONV_IS(RGBA64BE, RGB48LE)
+ || CONV_IS(BGRA64BE, BGR48LE)) conv = rgb64to48_bswap;
} else
/* BGR -> BGR */
if ((isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) ||
@@ -645,6 +1239,9 @@ static rgbConvFn findRgbConvFn(SwsContext *c)
}
}
+ if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) && !isRGBA32(srcFormat) && ALT32_CORR<0)
+ return NULL;
+
return conv;
}
@@ -656,34 +1253,52 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
{
const enum AVPixelFormat srcFormat = c->srcFormat;
const enum AVPixelFormat dstFormat = c->dstFormat;
+ const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat);
+ const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
const int srcBpp = (c->srcFormatBpp + 7) >> 3;
const int dstBpp = (c->dstFormatBpp + 7) >> 3;
rgbConvFn conv = findRgbConvFn(c);
if (!conv) {
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
- sws_format_name(srcFormat), sws_format_name(dstFormat));
+ av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
} else {
const uint8_t *srcPtr = src[0];
uint8_t *dstPtr = dst[0];
+ int src_bswap = IS_NOT_NE(c->srcFormatBpp, desc_src);
+ int dst_bswap = IS_NOT_NE(c->dstFormatBpp, desc_dst);
+
if ((srcFormat == AV_PIX_FMT_RGB32_1 || srcFormat == AV_PIX_FMT_BGR32_1) &&
!isRGBA32(dstFormat))
srcPtr += ALT32_CORR;
if ((dstFormat == AV_PIX_FMT_RGB32_1 || dstFormat == AV_PIX_FMT_BGR32_1) &&
- !isRGBA32(srcFormat))
+ !isRGBA32(srcFormat)) {
+ int i;
+ av_assert0(ALT32_CORR == 1);
+ for (i = 0; i < srcSliceH; i++)
+ dstPtr[dstStride[0] * (srcSliceY + i)] = 255;
dstPtr += ALT32_CORR;
+ }
if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 &&
- !(srcStride[0] % srcBpp))
+ !(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap)
conv(srcPtr, dstPtr + dstStride[0] * srcSliceY,
(srcSliceH - 1) * srcStride[0] + c->srcW * srcBpp);
else {
- int i;
+ int i, j;
dstPtr += dstStride[0] * srcSliceY;
for (i = 0; i < srcSliceH; i++) {
- conv(srcPtr, dstPtr, c->srcW * srcBpp);
+ if(src_bswap) {
+ for(j=0; j<c->srcW; j++)
+ ((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]);
+ conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp);
+ }else
+ conv(srcPtr, dstPtr, c->srcW * srcBpp);
+ if(dst_bswap)
+ for(j=0; j<c->srcW; j++)
+ ((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]);
srcPtr += srcStride[0];
dstPtr += dstStride[0];
}
@@ -696,13 +1311,14 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- rgb24toyv12(
+ ff_rgb24toyv12(
src[0],
dst[0] + srcSliceY * dstStride[0],
dst[1] + (srcSliceY >> 1) * dstStride[1],
dst[2] + (srcSliceY >> 1) * dstStride[2],
c->srcW, srcSliceH,
- dstStride[0], dstStride[1], srcStride[0]);
+ dstStride[0], dstStride[1], srcStride[0],
+ c->input_rgb2yuv_table);
if (dst[3])
fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
return srcSliceH;
@@ -741,7 +1357,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
while (length + c->srcW <= FFABS(dstStride[0]) &&
length + c->srcW <= FFABS(srcStride[0]))
length += c->srcW;
- assert(length != 0);
+ av_assert1(length != 0);
for (i = 0; i < srcSliceH; i++) {
memcpy(dstPtr, srcPtr, length);
@@ -752,25 +1368,25 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
return srcSliceH;
}
-#define clip9(x) av_clip_uintp2(x, 9)
-#define clip10(x) av_clip_uintp2(x, 10)
-#define DITHER_COPY(dst, dstStride, wfunc, src, srcStride, rfunc, dithers, shift, clip) \
- for (i = 0; i < height; i++) { \
- const uint8_t *dither = dithers[i & 7]; \
- for (j = 0; j < length - 7; j += 8) { \
- wfunc(&dst[j + 0], clip((rfunc(&src[j + 0]) + dither[0]) >> shift)); \
- wfunc(&dst[j + 1], clip((rfunc(&src[j + 1]) + dither[1]) >> shift)); \
- wfunc(&dst[j + 2], clip((rfunc(&src[j + 2]) + dither[2]) >> shift)); \
- wfunc(&dst[j + 3], clip((rfunc(&src[j + 3]) + dither[3]) >> shift)); \
- wfunc(&dst[j + 4], clip((rfunc(&src[j + 4]) + dither[4]) >> shift)); \
- wfunc(&dst[j + 5], clip((rfunc(&src[j + 5]) + dither[5]) >> shift)); \
- wfunc(&dst[j + 6], clip((rfunc(&src[j + 6]) + dither[6]) >> shift)); \
- wfunc(&dst[j + 7], clip((rfunc(&src[j + 7]) + dither[7]) >> shift)); \
- } \
- for (; j < length; j++) \
- wfunc(&dst[j], (rfunc(&src[j]) + dither[j & 7]) >> shift); \
- dst += dstStride; \
- src += srcStride; \
+#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+ uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
+ int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[src_depth-9][i&7];\
+ for (j = 0; j < length-7; j+=8){\
+ dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
+ dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
+ dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
+ dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
+ dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
+ dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
+ dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
+ dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+ }\
+ for (; j < length; j++)\
+ dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
+ dst += dstStride;\
+ src += srcStride;\
}
static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
@@ -781,174 +1397,129 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
int plane, i, j;
for (plane = 0; plane < 4; plane++) {
- int length = (plane == 0 || plane == 3) ? c->srcW : -((-c->srcW ) >> c->chrDstHSubSample);
- int y = (plane == 0 || plane == 3) ? srcSliceY: -((-srcSliceY) >> c->chrDstVSubSample);
- int height = (plane == 0 || plane == 3) ? srcSliceH: -((-srcSliceH) >> c->chrDstVSubSample);
+ int length = (plane == 0 || plane == 3) ? c->srcW : FF_CEIL_RSHIFT(c->srcW, c->chrDstHSubSample);
+ int y = (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample);
+ int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample);
const uint8_t *srcPtr = src[plane];
uint8_t *dstPtr = dst[plane] + dstStride[plane] * y;
+ int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0);
if (!dst[plane])
continue;
// ignore palette for GRAY8
if (plane == 1 && !dst[2]) continue;
if (!src[plane] || (plane == 1 && !src[2])) {
- int val = (plane == 3) ? 255 : 128;
- if (is16BPS(c->dstFormat))
- length *= 2;
- if (is9_OR_10BPS(c->dstFormat)) {
- fill_plane9or10(dst[plane], dstStride[plane],
- length, height, y, val,
- desc_dst->comp[plane].depth_minus1 + 1,
- isBE(c->dstFormat));
- } else
+ if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) {
+ fillPlane16(dst[plane], dstStride[plane], length, height, y,
+ plane == 3, desc_dst->comp[plane].depth_minus1,
+ isBE(c->dstFormat));
+ } else {
fillPlane(dst[plane], dstStride[plane], length, height, y,
- val);
+ (plane == 3) ? 255 : 128);
+ }
} else {
- if (is9_OR_10BPS(c->srcFormat)) {
+ if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
+ || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
+ ) {
const int src_depth = desc_src->comp[plane].depth_minus1 + 1;
const int dst_depth = desc_dst->comp[plane].depth_minus1 + 1;
const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
+ uint16_t *dstPtr2 = (uint16_t*)dstPtr;
- if (is16BPS(c->dstFormat)) {
- uint16_t *dstPtr2 = (uint16_t *) dstPtr;
-#define COPY9_OR_10TO16(rfunc, wfunc) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < length; j++) { \
- int srcpx = rfunc(&srcPtr2[j]); \
- wfunc(&dstPtr2[j], (srcpx << (16 - src_depth)) | (srcpx >> (2 * src_depth - 16))); \
- } \
- dstPtr2 += dstStride[plane] / 2; \
- srcPtr2 += srcStride[plane] / 2; \
+ if (dst_depth == 8) {
+ if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+ DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
+ } else {
+ DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
}
- if (isBE(c->dstFormat)) {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO16(AV_RB16, AV_WB16);
- } else {
- COPY9_OR_10TO16(AV_RL16, AV_WB16);
+ } else if (src_depth == 8) {
+ for (i = 0; i < height; i++) {
+ #define COPY816(w)\
+ if(shiftonly){\
+ for (j = 0; j < length; j++)\
+ w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\
+ }else{\
+ for (j = 0; j < length; j++)\
+ w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\
+ (srcPtr[j]>>(2*8-dst_depth)));\
}
- } else {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO16(AV_RB16, AV_WL16);
+ if(isBE(c->dstFormat)){
+ COPY816(AV_WB16)
} else {
- COPY9_OR_10TO16(AV_RL16, AV_WL16);
+ COPY816(AV_WL16)
}
+ dstPtr2 += dstStride[plane]/2;
+ srcPtr += srcStride[plane];
}
- } else if (is9_OR_10BPS(c->dstFormat)) {
- uint16_t *dstPtr2 = (uint16_t *) dstPtr;
-#define COPY9_OR_10TO9_OR_10(loop) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < length; j++) { \
- loop; \
- } \
- dstPtr2 += dstStride[plane] / 2; \
- srcPtr2 += srcStride[plane] / 2; \
- }
-#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
- if (dst_depth > src_depth) { \
- COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
- wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
- } else if (dst_depth < src_depth) { \
- DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_1, 1, clip9); \
- } else { \
- COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
- }
- if (isBE(c->dstFormat)) {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
- } else {
- COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
+ } else if (src_depth <= dst_depth) {
+ for (i = 0; i < height; i++) {
+ j = 0;
+ if(isBE(c->srcFormat) == HAVE_BIGENDIAN &&
+ isBE(c->dstFormat) == HAVE_BIGENDIAN &&
+ shiftonly) {
+ unsigned shift = dst_depth - src_depth;
+#if HAVE_FAST_64BIT
+#define FAST_COPY_UP(shift) \
+ for (; j < length - 3; j += 4) { \
+ uint64_t v = AV_RN64A(srcPtr2 + j); \
+ AV_WN64A(dstPtr2 + j, v << shift); \
+ }
+#else
+#define FAST_COPY_UP(shift) \
+ for (; j < length - 1; j += 2) { \
+ uint32_t v = AV_RN32A(srcPtr2 + j); \
+ AV_WN32A(dstPtr2 + j, v << shift); \
+ }
+#endif
+ switch (shift)
+ {
+ case 6: FAST_COPY_UP(6); break;
+ case 7: FAST_COPY_UP(7); break;
+ }
}
- } else {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
+#define COPY_UP(r,w) \
+ if(shiftonly){\
+ for (; j < length; j++){ \
+ unsigned int v= r(&srcPtr2[j]);\
+ w(&dstPtr2[j], v<<(dst_depth-src_depth));\
+ }\
+ }else{\
+ for (; j < length; j++){ \
+ unsigned int v= r(&srcPtr2[j]);\
+ w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
+ (v>>(2*src_depth-dst_depth)));\
+ }\
+ }
+ if(isBE(c->srcFormat)){
+ if(isBE(c->dstFormat)){
+ COPY_UP(AV_RB16, AV_WB16)
+ } else {
+ COPY_UP(AV_RB16, AV_WL16)
+ }
} else {
- COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
+ if(isBE(c->dstFormat)){
+ COPY_UP(AV_RL16, AV_WB16)
+ } else {
+ COPY_UP(AV_RL16, AV_WL16)
+ }
}
+ dstPtr2 += dstStride[plane]/2;
+ srcPtr2 += srcStride[plane]/2;
}
} else {
-#define W8(a, b) { *(a) = (b); }
-#define COPY9_OR_10TO8(rfunc) \
- if (src_depth == 9) { \
- DITHER_COPY(dstPtr, dstStride[plane], W8, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_1, 1, av_clip_uint8); \
- } else { \
- DITHER_COPY(dstPtr, dstStride[plane], W8, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_3, 2, av_clip_uint8); \
- }
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO8(AV_RB16);
- } else {
- COPY9_OR_10TO8(AV_RL16);
- }
- }
- } else if (is9_OR_10BPS(c->dstFormat)) {
- const int dst_depth = desc_dst->comp[plane].depth_minus1 + 1;
- uint16_t *dstPtr2 = (uint16_t *) dstPtr;
-
- if (is16BPS(c->srcFormat)) {
- const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
-#define COPY16TO9_OR_10(rfunc, wfunc) \
- if (dst_depth == 9) { \
- DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- ff_dither_8x8_128, 7, clip9); \
- } else { \
- DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_64, 6, clip10); \
- }
- if (isBE(c->dstFormat)) {
- if (isBE(c->srcFormat)) {
- COPY16TO9_OR_10(AV_RB16, AV_WB16);
+ if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+ if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
} else {
- COPY16TO9_OR_10(AV_RL16, AV_WB16);
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
}
- } else {
- if (isBE(c->srcFormat)) {
- COPY16TO9_OR_10(AV_RB16, AV_WL16);
+ }else{
+ if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
} else {
- COPY16TO9_OR_10(AV_RL16, AV_WL16);
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
}
}
- } else /* 8bit */ {
-#define COPY8TO9_OR_10(wfunc) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < length; j++) { \
- const int srcpx = srcPtr[j]; \
- wfunc(&dstPtr2[j], (srcpx << (dst_depth - 8)) | (srcpx >> (16 - dst_depth))); \
- } \
- dstPtr2 += dstStride[plane] / 2; \
- srcPtr += srcStride[plane]; \
- }
- if (isBE(c->dstFormat)) {
- COPY8TO9_OR_10(AV_WB16);
- } else {
- COPY8TO9_OR_10(AV_WL16);
- }
- }
- } else if (is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
- const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
-#define COPY16TO8(rfunc) \
- DITHER_COPY(dstPtr, dstStride[plane], W8, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_256, 8, av_clip_uint8);
- if (isBE(c->srcFormat)) {
- COPY16TO8(AV_RB16);
- } else {
- COPY16TO8(AV_RL16);
- }
- } else if (!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
- for (i = 0; i < height; i++) {
- for (j = 0; j < length; j++) {
- dstPtr[ j << 1 ] = srcPtr[j];
- dstPtr[(j << 1) + 1] = srcPtr[j];
- }
- srcPtr += srcStride[plane];
- dstPtr += dstStride[plane];
}
} else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) &&
isBE(c->srcFormat) != isBE(c->dstFormat)) {
@@ -1010,11 +1581,11 @@ void ff_get_unscaled_swscale(SwsContext *c)
/* yuv2bgr */
if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P ||
srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
- !(flags & SWS_ACCURATE_RND) && !(dstH & 1)) {
+ !(flags & SWS_ACCURATE_RND) && (c->dither == SWS_DITHER_BAYER || c->dither == SWS_DITHER_AUTO) && !(dstH & 1)) {
c->swscale = ff_yuv2rgb_get_func_ptr(c);
}
- if (srcFormat == AV_PIX_FMT_YUV410P &&
+ if (srcFormat == AV_PIX_FMT_YUV410P && !(dstH & 3) &&
(dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
!(flags & SWS_BITEXACT)) {
c->swscale = yvu9ToYv12Wrapper;
@@ -1031,6 +1602,10 @@ void ff_get_unscaled_swscale(SwsContext *c)
&& (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
c->swscale = rgbToRgbWrapper;
+ if ((srcFormat == AV_PIX_FMT_GBRP && dstFormat == AV_PIX_FMT_GBRAP) ||
+ (srcFormat == AV_PIX_FMT_GBRAP && dstFormat == AV_PIX_FMT_GBRP))
+ c->swscale = planarRgbToplanarRgbWrapper;
+
#define isByteRGB(f) ( \
f == AV_PIX_FMT_RGB32 || \
f == AV_PIX_FMT_RGB32_1 || \
@@ -1042,33 +1617,89 @@ void ff_get_unscaled_swscale(SwsContext *c)
if (srcFormat == AV_PIX_FMT_GBRP && isPlanar(srcFormat) && isByteRGB(dstFormat))
c->swscale = planarRgbToRgbWrapper;
+ if ((srcFormat == AV_PIX_FMT_RGB48LE || srcFormat == AV_PIX_FMT_RGB48BE ||
+ srcFormat == AV_PIX_FMT_BGR48LE || srcFormat == AV_PIX_FMT_BGR48BE ||
+ srcFormat == AV_PIX_FMT_RGBA64LE || srcFormat == AV_PIX_FMT_RGBA64BE ||
+ srcFormat == AV_PIX_FMT_BGRA64LE || srcFormat == AV_PIX_FMT_BGRA64BE) &&
+ (dstFormat == AV_PIX_FMT_GBRP9LE || dstFormat == AV_PIX_FMT_GBRP9BE ||
+ dstFormat == AV_PIX_FMT_GBRP10LE || dstFormat == AV_PIX_FMT_GBRP10BE ||
+ dstFormat == AV_PIX_FMT_GBRP12LE || dstFormat == AV_PIX_FMT_GBRP12BE ||
+ dstFormat == AV_PIX_FMT_GBRP14LE || dstFormat == AV_PIX_FMT_GBRP14BE ||
+ dstFormat == AV_PIX_FMT_GBRP16LE || dstFormat == AV_PIX_FMT_GBRP16BE ||
+ dstFormat == AV_PIX_FMT_GBRAP16LE || dstFormat == AV_PIX_FMT_GBRAP16BE ))
+ c->swscale = Rgb16ToPlanarRgb16Wrapper;
+
+ if ((srcFormat == AV_PIX_FMT_GBRP9LE || srcFormat == AV_PIX_FMT_GBRP9BE ||
+ srcFormat == AV_PIX_FMT_GBRP16LE || srcFormat == AV_PIX_FMT_GBRP16BE ||
+ srcFormat == AV_PIX_FMT_GBRP10LE || srcFormat == AV_PIX_FMT_GBRP10BE ||
+ srcFormat == AV_PIX_FMT_GBRP12LE || srcFormat == AV_PIX_FMT_GBRP12BE ||
+ srcFormat == AV_PIX_FMT_GBRP14LE || srcFormat == AV_PIX_FMT_GBRP14BE ||
+ srcFormat == AV_PIX_FMT_GBRAP16LE || srcFormat == AV_PIX_FMT_GBRAP16BE) &&
+ (dstFormat == AV_PIX_FMT_RGB48LE || dstFormat == AV_PIX_FMT_RGB48BE ||
+ dstFormat == AV_PIX_FMT_BGR48LE || dstFormat == AV_PIX_FMT_BGR48BE ||
+ dstFormat == AV_PIX_FMT_RGBA64LE || dstFormat == AV_PIX_FMT_RGBA64BE ||
+ dstFormat == AV_PIX_FMT_BGRA64LE || dstFormat == AV_PIX_FMT_BGRA64BE))
+ c->swscale = planarRgb16ToRgb16Wrapper;
+
if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth_minus1 == 7 &&
isPackedRGB(srcFormat) && dstFormat == AV_PIX_FMT_GBRP)
c->swscale = rgbToPlanarRgbWrapper;
+ if (isBayer(srcFormat)) {
+ if (dstFormat == AV_PIX_FMT_RGB24)
+ c->swscale = bayer_to_rgb24_wrapper;
+ else if (dstFormat == AV_PIX_FMT_YUV420P)
+ c->swscale = bayer_to_yv12_wrapper;
+ else if (!isBayer(dstFormat)) {
+ av_log(c, AV_LOG_ERROR, "unsupported bayer conversion\n");
+ av_assert0(0);
+ }
+ }
+
/* bswap 16 bits per pixel/component packed formats */
- if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) ||
+ if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_BGGR16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_RGGB16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_GBRG16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BAYER_GRBG16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR444) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR48) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR555) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGR565) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YA16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP9) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP10) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP12) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP14) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRAP16) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB444) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB48) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB555) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGB565) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_RGBA64) ||
- IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_XYZ12))
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_XYZ12) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P9) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P10) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P12) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P14) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV420P16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P9) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P10) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P12) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P14) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV422P16) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P9) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P10) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P12) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P14) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YUV444P16))
c->swscale = packed_16bpc_bswap;
- if ((usePal(srcFormat) && (
- dstFormat == AV_PIX_FMT_RGB32 ||
- dstFormat == AV_PIX_FMT_RGB32_1 ||
- dstFormat == AV_PIX_FMT_RGB24 ||
- dstFormat == AV_PIX_FMT_BGR32 ||
- dstFormat == AV_PIX_FMT_BGR32_1 ||
- dstFormat == AV_PIX_FMT_BGR24)))
+ if (usePal(srcFormat) && isByteRGB(dstFormat))
c->swscale = palToRgbWrapper;
if (srcFormat == AV_PIX_FMT_YUV422P) {
@@ -1099,13 +1730,14 @@ void ff_get_unscaled_swscale(SwsContext *c)
if (srcFormat == AV_PIX_FMT_UYVY422 && dstFormat == AV_PIX_FMT_YUV422P)
c->swscale = uyvyToYuv422Wrapper;
+#define isPlanarGray(x) (isGray(x) && (x) != AV_PIX_FMT_YA8 && (x) != AV_PIX_FMT_YA16LE && (x) != AV_PIX_FMT_YA16BE)
/* simple copy */
if ( srcFormat == dstFormat ||
(srcFormat == AV_PIX_FMT_YUVA420P && dstFormat == AV_PIX_FMT_YUV420P) ||
(srcFormat == AV_PIX_FMT_YUV420P && dstFormat == AV_PIX_FMT_YUVA420P) ||
- (isPlanarYUV(srcFormat) && isGray(dstFormat)) ||
- (isPlanarYUV(dstFormat) && isGray(srcFormat)) ||
- (isGray(dstFormat) && isGray(srcFormat)) ||
+ (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
+ (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) ||
+ (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) ||
(isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) &&
c->chrDstHSubSample == c->chrSrcHSubSample &&
c->chrDstVSubSample == c->chrSrcVSubSample &&
@@ -1120,177 +1752,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
if (ARCH_PPC)
ff_get_unscaled_swscale_ppc(c);
-}
-
-static void reset_ptr(const uint8_t *src[], int format)
-{
- if (!isALPHA(format))
- src[3] = NULL;
- if (!isPlanar(format)) {
- src[3] = src[2] = NULL;
-
- if (!usePal(format))
- src[1] = NULL;
- }
-}
-
-static int check_image_pointers(uint8_t *data[4], enum AVPixelFormat pix_fmt,
- const int linesizes[4])
-{
- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
- int i;
-
- for (i = 0; i < 4; i++) {
- int plane = desc->comp[i].plane;
- if (!data[plane] || !linesizes[plane])
- return 0;
- }
-
- return 1;
-}
-
-/**
- * swscale wrapper, so we don't need to export the SwsContext.
- * Assumes planar YUV to be in YUV order instead of YVU.
- */
-int attribute_align_arg sws_scale(struct SwsContext *c,
- const uint8_t * const srcSlice[],
- const int srcStride[], int srcSliceY,
- int srcSliceH, uint8_t *const dst[],
- const int dstStride[])
-{
- int i;
- const uint8_t *src2[4] = { srcSlice[0], srcSlice[1], srcSlice[2], srcSlice[3] };
- uint8_t *dst2[4] = { dst[0], dst[1], dst[2], dst[3] };
-
- // do not mess up sliceDir if we have a "trailing" 0-size slice
- if (srcSliceH == 0)
- return 0;
-
- if (!check_image_pointers(srcSlice, c->srcFormat, srcStride)) {
- av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
- return 0;
- }
- if (!check_image_pointers(dst, c->dstFormat, dstStride)) {
- av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
- return 0;
- }
-
- if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
- av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
- return 0;
- }
- if (c->sliceDir == 0) {
- if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
- }
-
- if (usePal(c->srcFormat)) {
- for (i = 0; i < 256; i++) {
- int r, g, b, y, u, v;
- if (c->srcFormat == AV_PIX_FMT_PAL8) {
- uint32_t p = ((const uint32_t *)(srcSlice[1]))[i];
- r = (p >> 16) & 0xFF;
- g = (p >> 8) & 0xFF;
- b = p & 0xFF;
- } else if (c->srcFormat == AV_PIX_FMT_RGB8) {
- r = ( i >> 5 ) * 36;
- g = ((i >> 2) & 7) * 36;
- b = ( i & 3) * 85;
- } else if (c->srcFormat == AV_PIX_FMT_BGR8) {
- b = ( i >> 6 ) * 85;
- g = ((i >> 3) & 7) * 36;
- r = ( i & 7) * 36;
- } else if (c->srcFormat == AV_PIX_FMT_RGB4_BYTE) {
- r = ( i >> 3 ) * 255;
- g = ((i >> 1) & 3) * 85;
- b = ( i & 1) * 255;
- } else if (c->srcFormat == AV_PIX_FMT_GRAY8 ||
- c->srcFormat == AV_PIX_FMT_YA8) {
- r = g = b = i;
- } else {
- assert(c->srcFormat == AV_PIX_FMT_BGR4_BYTE);
- b = ( i >> 3 ) * 255;
- g = ((i >> 1) & 3) * 85;
- r = ( i & 1) * 255;
- }
- y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
- u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
- v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
- c->pal_yuv[i] = y + (u << 8) + (v << 16) + (0xFFU << 24);
-
- switch (c->dstFormat) {
- case AV_PIX_FMT_BGR32:
-#if !HAVE_BIGENDIAN
- case AV_PIX_FMT_RGB24:
-#endif
- c->pal_rgb[i] = r + (g << 8) + (b << 16) + (0xFFU << 24);
- break;
- case AV_PIX_FMT_BGR32_1:
-#if HAVE_BIGENDIAN
- case AV_PIX_FMT_BGR24:
-#endif
- c->pal_rgb[i] = 0xFF + (r << 8) + (g << 16) + ((unsigned)b << 24);
- break;
- case AV_PIX_FMT_RGB32_1:
-#if HAVE_BIGENDIAN
- case AV_PIX_FMT_RGB24:
-#endif
- c->pal_rgb[i] = 0xFF + (b << 8) + (g << 16) + ((unsigned)r << 24);
- break;
- case AV_PIX_FMT_RGB32:
-#if !HAVE_BIGENDIAN
- case AV_PIX_FMT_BGR24:
-#endif
- default:
- c->pal_rgb[i] = b + (g << 8) + (r << 16) + (0xFFU << 24);
- }
- }
- }
-
- // copy strides, so they can safely be modified
- if (c->sliceDir == 1) {
- // slices go from top to bottom
- int srcStride2[4] = { srcStride[0], srcStride[1], srcStride[2],
- srcStride[3] };
- int dstStride2[4] = { dstStride[0], dstStride[1], dstStride[2],
- dstStride[3] };
-
- reset_ptr(src2, c->srcFormat);
- reset_ptr((const uint8_t **) dst2, c->dstFormat);
-
- /* reset slice direction at end of frame */
- if (srcSliceY + srcSliceH == c->srcH)
- c->sliceDir = 0;
-
- return c->swscale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2,
- dstStride2);
- } else {
- // slices go from bottom to top => we flip the image internally
- int srcStride2[4] = { -srcStride[0], -srcStride[1], -srcStride[2],
- -srcStride[3] };
- int dstStride2[4] = { -dstStride[0], -dstStride[1], -dstStride[2],
- -dstStride[3] };
-
- src2[0] += (srcSliceH - 1) * srcStride[0];
- if (!usePal(c->srcFormat))
- src2[1] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[1];
- src2[2] += ((srcSliceH >> c->chrSrcVSubSample) - 1) * srcStride[2];
- src2[3] += (srcSliceH - 1) * srcStride[3];
- dst2[0] += ( c->dstH - 1) * dstStride[0];
- dst2[1] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[1];
- dst2[2] += ((c->dstH >> c->chrDstVSubSample) - 1) * dstStride[2];
- dst2[3] += ( c->dstH - 1) * dstStride[3];
-
- reset_ptr(src2, c->srcFormat);
- reset_ptr((const uint8_t **) dst2, c->dstFormat);
-
- /* reset slice direction at end of frame */
- if (!srcSliceY)
- c->sliceDir = 0;
-
- return c->swscale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH,
- srcSliceH, dst2, dstStride2);
- }
+// if (ARCH_ARM)
+// ff_get_unscaled_swscale_arm(c);
}
/* Convert the palette to the same packed 32-bit format as the palette */
diff --git a/libswscale/swscaleres.rc b/libswscale/swscaleres.rc
new file mode 100644
index 0000000..5cb8ee7
--- /dev/null
+++ b/libswscale/swscaleres.rc
@@ -0,0 +1,55 @@
+/*
+ * Windows resource file for libswscale
+ *
+ * Copyright (C) 2012 James Almer
+ * Copyright (C) 2013 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <windows.h>
+#include "libswscale/version.h"
+#include "libavutil/ffversion.h"
+#include "config.h"
+
+1 VERSIONINFO
+FILEVERSION LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO, 0
+PRODUCTVERSION LIBSWSCALE_VERSION_MAJOR, LIBSWSCALE_VERSION_MINOR, LIBSWSCALE_VERSION_MICRO, 0
+FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+FILEOS VOS_NT_WINDOWS32
+FILETYPE VFT_DLL
+{
+ BLOCK "StringFileInfo"
+ {
+ BLOCK "040904B0"
+ {
+ VALUE "CompanyName", "FFmpeg Project"
+ VALUE "FileDescription", "FFmpeg image rescaling library"
+ VALUE "FileVersion", AV_STRINGIFY(LIBSWSCALE_VERSION)
+ VALUE "InternalName", "libswscale"
+ VALUE "LegalCopyright", "Copyright (C) 2000-" AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project"
+ VALUE "OriginalFilename", "swscale" BUILDSUF "-" AV_STRINGIFY(LIBSWSCALE_VERSION_MAJOR) SLIBSUF
+ VALUE "ProductName", "FFmpeg"
+ VALUE "ProductVersion", FFMPEG_VERSION
+ }
+ }
+
+ BLOCK "VarFileInfo"
+ {
+ VALUE "Translation", 0x0409, 0x04B0
+ }
+}
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 32e304c..06fd358 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1,27 +1,27 @@
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#define _SVID_SOURCE // needed for MAP_ANONYMOUS
-#include <assert.h>
+#define _DARWIN_C_SOURCE // needed for MAP_ANON
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
@@ -38,6 +38,7 @@
#endif
#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
@@ -52,24 +53,25 @@
#include "swscale.h"
#include "swscale_internal.h"
+static void handle_formats(SwsContext *c);
+
unsigned swscale_version(void)
{
+ av_assert0(LIBSWSCALE_VERSION_MICRO >= 100);
return LIBSWSCALE_VERSION_INT;
}
const char *swscale_configuration(void)
{
- return LIBAV_CONFIGURATION;
+ return FFMPEG_CONFIGURATION;
}
const char *swscale_license(void)
{
#define LICENSE_PREFIX "libswscale license: "
- return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+ return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}
-#define RET 0xC3 // near return opcode for x86
-
typedef struct FormatEntry {
uint8_t is_supported_in :1;
uint8_t is_supported_out :1;
@@ -90,6 +92,7 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_MONOBLACK] = { 1, 1 },
[AV_PIX_FMT_PAL8] = { 1, 0 },
[AV_PIX_FMT_YUVJ420P] = { 1, 1 },
+ [AV_PIX_FMT_YUVJ411P] = { 1, 1 },
[AV_PIX_FMT_YUVJ422P] = { 1, 1 },
[AV_PIX_FMT_YUVJ444P] = { 1, 1 },
[AV_PIX_FMT_YVYU422] = { 1, 1 },
@@ -107,6 +110,10 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_RGBA] = { 1, 1 },
[AV_PIX_FMT_ABGR] = { 1, 1 },
[AV_PIX_FMT_BGRA] = { 1, 1 },
+ [AV_PIX_FMT_0RGB] = { 1, 1 },
+ [AV_PIX_FMT_RGB0] = { 1, 1 },
+ [AV_PIX_FMT_0BGR] = { 1, 1 },
+ [AV_PIX_FMT_BGR0] = { 1, 1 },
[AV_PIX_FMT_GRAY16BE] = { 1, 1 },
[AV_PIX_FMT_GRAY16LE] = { 1, 1 },
[AV_PIX_FMT_YUV440P] = { 1, 1 },
@@ -134,8 +141,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_YUVA444P16LE]= { 1, 1 },
[AV_PIX_FMT_RGB48BE] = { 1, 1 },
[AV_PIX_FMT_RGB48LE] = { 1, 1 },
- [AV_PIX_FMT_RGBA64BE] = { 0, 0, 1 },
- [AV_PIX_FMT_RGBA64LE] = { 0, 0, 1 },
+ [AV_PIX_FMT_RGBA64BE] = { 1, 1, 1 },
+ [AV_PIX_FMT_RGBA64LE] = { 1, 1, 1 },
[AV_PIX_FMT_RGB565BE] = { 1, 1 },
[AV_PIX_FMT_RGB565LE] = { 1, 1 },
[AV_PIX_FMT_RGB555BE] = { 1, 1 },
@@ -159,29 +166,60 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_YA16LE] = { 1, 0 },
[AV_PIX_FMT_BGR48BE] = { 1, 1 },
[AV_PIX_FMT_BGR48LE] = { 1, 1 },
- [AV_PIX_FMT_BGRA64BE] = { 0, 0, 1 },
- [AV_PIX_FMT_BGRA64LE] = { 0, 0, 1 },
+ [AV_PIX_FMT_BGRA64BE] = { 1, 1, 1 },
+ [AV_PIX_FMT_BGRA64LE] = { 1, 1, 1 },
[AV_PIX_FMT_YUV420P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV420P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV420P10LE] = { 1, 1 },
+ [AV_PIX_FMT_YUV420P12BE] = { 1, 1 },
+ [AV_PIX_FMT_YUV420P12LE] = { 1, 1 },
+ [AV_PIX_FMT_YUV420P14BE] = { 1, 1 },
+ [AV_PIX_FMT_YUV420P14LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV422P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV422P10LE] = { 1, 1 },
+ [AV_PIX_FMT_YUV422P12BE] = { 1, 1 },
+ [AV_PIX_FMT_YUV422P12LE] = { 1, 1 },
+ [AV_PIX_FMT_YUV422P14BE] = { 1, 1 },
+ [AV_PIX_FMT_YUV422P14LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P9BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P9LE] = { 1, 1 },
[AV_PIX_FMT_YUV444P10BE] = { 1, 1 },
[AV_PIX_FMT_YUV444P10LE] = { 1, 1 },
+ [AV_PIX_FMT_YUV444P12BE] = { 1, 1 },
+ [AV_PIX_FMT_YUV444P12LE] = { 1, 1 },
+ [AV_PIX_FMT_YUV444P14BE] = { 1, 1 },
+ [AV_PIX_FMT_YUV444P14LE] = { 1, 1 },
[AV_PIX_FMT_GBRP] = { 1, 1 },
[AV_PIX_FMT_GBRP9LE] = { 1, 1 },
[AV_PIX_FMT_GBRP9BE] = { 1, 1 },
[AV_PIX_FMT_GBRP10LE] = { 1, 1 },
[AV_PIX_FMT_GBRP10BE] = { 1, 1 },
+ [AV_PIX_FMT_GBRP12LE] = { 1, 1 },
+ [AV_PIX_FMT_GBRP12BE] = { 1, 1 },
+ [AV_PIX_FMT_GBRP14LE] = { 1, 1 },
+ [AV_PIX_FMT_GBRP14BE] = { 1, 1 },
[AV_PIX_FMT_GBRP16LE] = { 1, 0 },
[AV_PIX_FMT_GBRP16BE] = { 1, 0 },
- [AV_PIX_FMT_XYZ12BE] = { 0, 0, 1 },
- [AV_PIX_FMT_XYZ12LE] = { 0, 0, 1 },
+ [AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 },
+ [AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 },
+ [AV_PIX_FMT_GBRAP] = { 1, 1 },
+ [AV_PIX_FMT_GBRAP16LE] = { 1, 0 },
+ [AV_PIX_FMT_GBRAP16BE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_BGGR8] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_RGGB8] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_GBRG8] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_GRBG8] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_BGGR16LE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_BGGR16BE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_RGGB16LE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_RGGB16BE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_GBRG16LE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_GBRG16BE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_GRBG16LE] = { 1, 0 },
+ [AV_PIX_FMT_BAYER_GRBG16BE] = { 1, 0 },
};
int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -202,6 +240,7 @@ int sws_isSupportedEndiannessConversion(enum AVPixelFormat pix_fmt)
format_entries[pix_fmt].is_supported_endianness : 0;
}
+#if FF_API_SWS_FORMAT_NAME
const char *sws_format_name(enum AVPixelFormat format)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
@@ -210,6 +249,7 @@ const char *sws_format_name(enum AVPixelFormat format)
else
return "Unknown format";
}
+#endif
static double getSplineCoeff(double a, double b, double c, double d,
double dist)
@@ -224,12 +264,41 @@ static double getSplineCoeff(double a, double b, double c, double d,
dist - 1.0);
}
+static av_cold int get_local_pos(SwsContext *s, int chr_subsample, int pos, int dir)
+{
+ if (pos < 0) {
+ pos = (128 << chr_subsample) - 128;
+ }
+ pos += 128; // relative to ideal left edge
+ return pos >> chr_subsample;
+}
+
+typedef struct {
+ int flag; ///< flag associated to the algorithm
+ const char *description; ///< human-readable description
+ int size_factor; ///< size factor used when initing the filters
+} ScaleAlgorithm;
+
+static const ScaleAlgorithm scale_algorithms[] = {
+ { SWS_AREA, "area averaging", 1 /* downscale only, for upscale it is bilinear */ },
+ { SWS_BICUBIC, "bicubic", 4 },
+ { SWS_BICUBLIN, "luma bicubic / chroma bilinear", -1 },
+ { SWS_BILINEAR, "bilinear", 2 },
+ { SWS_FAST_BILINEAR, "fast bilinear", -1 },
+ { SWS_GAUSS, "Gaussian", 8 /* infinite ;) */ },
+ { SWS_LANCZOS, "Lanczos", -1 /* custom */ },
+ { SWS_POINT, "nearest neighbor / point", -1 },
+ { SWS_SINC, "sinc", 20 /* infinite ;) */ },
+ { SWS_SPLINE, "bicubic spline", 20 /* infinite :)*/ },
+ { SWS_X, "experimental", 8 },
+};
+
static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
int *outFilterSize, int xInc, int srcW,
int dstW, int filterAlign, int one,
int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter,
- double param[2], int is_horizontal)
+ double param[2], int srcPos, int dstPos)
{
int i;
int filterSize;
@@ -237,19 +306,19 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
int minFilterSize;
int64_t *filter = NULL;
int64_t *filter2 = NULL;
- const int64_t fone = 1LL << 54;
+ const int64_t fone = 1LL << (54 - FFMIN(av_log2(srcW/dstW), 8));
int ret = -1;
emms_c(); // FIXME should not be required but IS (even for non-MMX versions)
// NOTE: the +3 is for the MMX(+1) / SSE(+3) scaler which reads over the end
- FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW + 3) * sizeof(**filterPos), fail);
+ FF_ALLOC_ARRAY_OR_GOTO(NULL, *filterPos, (dstW + 3), sizeof(**filterPos), fail);
- if (FFABS(xInc - 0x10000) < 10) { // unscaled
+ if (FFABS(xInc - 0x10000) < 10 && srcPos == dstPos) { // unscaled
int i;
filterSize = 1;
- FF_ALLOCZ_OR_GOTO(NULL, filter,
- dstW * sizeof(*filter) * filterSize, fail);
+ FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter,
+ dstW, sizeof(*filter) * filterSize, fail);
for (i = 0; i < dstW; i++) {
filter[i * filterSize] = fone;
@@ -257,12 +326,12 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
}
} else if (flags & SWS_POINT) { // lame looking point sampling mode
int i;
- int xDstInSrc;
+ int64_t xDstInSrc;
filterSize = 1;
- FF_ALLOC_OR_GOTO(NULL, filter,
- dstW * sizeof(*filter) * filterSize, fail);
+ FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
+ dstW, sizeof(*filter) * filterSize, fail);
- xDstInSrc = xInc / 2 - 0x8000;
+ xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
@@ -273,12 +342,12 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
} else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
(flags & SWS_FAST_BILINEAR)) { // bilinear upscale
int i;
- int xDstInSrc;
+ int64_t xDstInSrc;
filterSize = 2;
- FF_ALLOC_OR_GOTO(NULL, filter,
- dstW * sizeof(*filter) * filterSize, fail);
+ FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
+ dstW, sizeof(*filter) * filterSize, fail);
- xDstInSrc = xInc / 2 - 0x8000;
+ xDstInSrc = ((dstPos*(int64_t)xInc)>>8) - ((srcPos*0x8000LL)>>7);
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
int j;
@@ -286,8 +355,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
(*filterPos)[i] = xx;
// bilinear upscale / linear interpolate / area averaging
for (j = 0; j < filterSize; j++) {
- int64_t coeff = fone - FFABS((xx << 16) - xDstInSrc) *
- (fone >> 16);
+ int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
if (coeff < 0)
coeff = 0;
filter[i * filterSize + j] = coeff;
@@ -297,28 +365,17 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
}
} else {
int64_t xDstInSrc;
- int sizeFactor;
-
- if (flags & SWS_BICUBIC)
- sizeFactor = 4;
- else if (flags & SWS_X)
- sizeFactor = 8;
- else if (flags & SWS_AREA)
- sizeFactor = 1; // downscale only, for upscale it is bilinear
- else if (flags & SWS_GAUSS)
- sizeFactor = 8; // infinite ;)
- else if (flags & SWS_LANCZOS)
- sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
- else if (flags & SWS_SINC)
- sizeFactor = 20; // infinite ;)
- else if (flags & SWS_SPLINE)
- sizeFactor = 20; // infinite ;)
- else if (flags & SWS_BILINEAR)
- sizeFactor = 2;
- else {
- sizeFactor = 0; // GCC warning killer
- assert(0);
+ int sizeFactor = -1;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
+ if (flags & scale_algorithms[i].flag && scale_algorithms[i].size_factor > 0) {
+ sizeFactor = scale_algorithms[i].size_factor;
+ break;
+ }
}
+ if (flags & SWS_LANCZOS)
+ sizeFactor = param[0] != SWS_PARAM_DEFAULT ? ceil(2 * param[0]) : 6;
+ av_assert0(sizeFactor > 0);
if (xInc <= 1 << 16)
filterSize = 1 + sizeFactor; // upscale
@@ -328,10 +385,10 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
filterSize = FFMIN(filterSize, srcW - 2);
filterSize = FFMAX(filterSize, 1);
- FF_ALLOC_OR_GOTO(NULL, filter,
- dstW * sizeof(*filter) * filterSize, fail);
+ FF_ALLOC_ARRAY_OR_GOTO(NULL, filter,
+ dstW, sizeof(*filter) * filterSize, fail);
- xDstInSrc = xInc - 0x10000;
+ xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
for (i = 0; i < dstW; i++) {
int xx = (xDstInSrc - ((int64_t)(filterSize - 2) << 16)) / (1 << 17);
int j;
@@ -365,7 +422,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
(-12 * B - 48 * C) * d +
(8 * B + 24 * C) * (1 << 30);
}
- coeff *= fone >> (30 + 24);
+ coeff /= (1LL<<54)/fone;
}
#if 0
else if (flags & SWS_X) {
@@ -416,8 +473,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
double p = -2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
} else {
- coeff = 0.0; // GCC warning killer
- assert(0);
+ av_assert0(0);
}
filter[i * filterSize + j] = coeff;
@@ -430,14 +486,14 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
/* apply src & dst Filter to filter -> filter2
* av_free(filter);
*/
- assert(filterSize > 0);
+ av_assert0(filterSize > 0);
filter2Size = filterSize;
if (srcFilter)
filter2Size += srcFilter->length - 1;
if (dstFilter)
filter2Size += dstFilter->length - 1;
- assert(filter2Size > 0);
- FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
+ av_assert0(filter2Size > 0);
+ FF_ALLOCZ_ARRAY_OR_GOTO(NULL, filter2, dstW, filter2Size * sizeof(*filter2), fail);
for (i = 0; i < dstW; i++) {
int j, k;
@@ -512,19 +568,24 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
filterAlign = 1;
}
- if (INLINE_MMX(cpu_flags)) {
+ if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
// special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2)
filterAlign = 1;
}
- assert(minFilterSize > 0);
+ av_assert0(minFilterSize > 0);
filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
- assert(filterSize > 0);
- filter = av_malloc(filterSize * dstW * sizeof(*filter));
+ av_assert0(filterSize > 0);
+ filter = av_malloc_array(dstW, filterSize * sizeof(*filter));
+ if (!filter)
+ goto fail;
if (filterSize >= MAX_FILTER_SIZE * 16 /
- ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
+ ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16)) {
+ av_log(NULL, AV_LOG_ERROR, "sws: filterSize %d is too large, try less extreme scaling or set --sws-max-filter-size and recompile\n",
+ FF_CEIL_RSHIFT((filterSize+1) * ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16), 4));
goto fail;
+ }
*outFilterSize = filterSize;
if (flags & SWS_PRINT_INFO)
@@ -548,36 +609,34 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
// FIXME try to align filterPos if possible
// fix borders
- if (is_horizontal) {
- for (i = 0; i < dstW; i++) {
- int j;
- if ((*filterPos)[i] < 0) {
- // move filter coefficients left to compensate for filterPos
- for (j = 1; j < filterSize; j++) {
- int left = FFMAX(j + (*filterPos)[i], 0);
- filter[i * filterSize + left] += filter[i * filterSize + j];
- filter[i * filterSize + j] = 0;
- }
- (*filterPos)[i] = 0;
+ for (i = 0; i < dstW; i++) {
+ int j;
+ if ((*filterPos)[i] < 0) {
+ // move filter coefficients left to compensate for filterPos
+ for (j = 1; j < filterSize; j++) {
+ int left = FFMAX(j + (*filterPos)[i], 0);
+ filter[i * filterSize + left] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
}
+ (*filterPos)[i]= 0;
+ }
- if ((*filterPos)[i] + filterSize > srcW) {
- int shift = (*filterPos)[i] + filterSize - srcW;
- // move filter coefficients right to compensate for filterPos
- for (j = filterSize - 2; j >= 0; j--) {
- int right = FFMIN(j + shift, filterSize - 1);
- filter[i * filterSize + right] += filter[i * filterSize + j];
- filter[i * filterSize + j] = 0;
- }
- (*filterPos)[i] = srcW - filterSize;
+ if ((*filterPos)[i] + filterSize > srcW) {
+ int shift = (*filterPos)[i] + filterSize - srcW;
+ // move filter coefficients right to compensate for filterPos
+ for (j = filterSize - 2; j >= 0; j--) {
+ int right = FFMIN(j + shift, filterSize - 1);
+ filter[i * filterSize + right] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
}
+ (*filterPos)[i]= srcW - filterSize;
}
}
// Note the +1 is for the MMX scaler which reads over the end
/* align at 16 for AltiVec (needed by hScale_altivec_real) */
- FF_ALLOCZ_OR_GOTO(NULL, *outFilter,
- *outFilterSize * (dstW + 3) * sizeof(int16_t), fail);
+ FF_ALLOCZ_ARRAY_OR_GOTO(NULL, *outFilter,
+ (dstW + 3), *outFilterSize * sizeof(int16_t), fail);
/* normalize & store in outFilter */
for (i = 0; i < dstW; i++) {
@@ -589,6 +648,10 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
sum += filter[i * filterSize + j];
}
sum = (sum + one / 2) / one;
+ if (!sum) {
+ av_log(NULL, AV_LOG_WARNING, "SwScaler: zero vector in scaling\n");
+ sum = 1;
+ }
for (j = 0; j < *outFilterSize; j++) {
int64_t v = filter[i * filterSize + j] + error;
int intV = ROUNDED_DIV(v, sum);
@@ -611,211 +674,193 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
ret = 0;
fail:
+ if(ret < 0)
+ av_log(NULL, AV_LOG_ERROR, "sws: initFilter failed\n");
av_free(filter);
av_free(filter2);
return ret;
}
-#if HAVE_MMXEXT_INLINE
-static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
- int16_t *filter, int32_t *filterPos,
- int numSplits)
+static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange)
{
- uint8_t *fragmentA;
- x86_reg imm8OfPShufW1A;
- x86_reg imm8OfPShufW2A;
- x86_reg fragmentLengthA;
- uint8_t *fragmentB;
- x86_reg imm8OfPShufW1B;
- x86_reg imm8OfPShufW2B;
- x86_reg fragmentLengthB;
- int fragmentPos;
-
- int xpos, i;
-
- // create an optimized horizontal scaling routine
- /* This scaler is made of runtime-generated MMXEXT code using specially tuned
- * pshufw instructions. For every four output pixels, if four input pixels
- * are enough for the fast bilinear scaling, then a chunk of fragmentB is
- * used. If five input pixels are needed, then a chunk of fragmentA is used.
- */
-
- // code fragment
-
- __asm__ volatile (
- "jmp 9f \n\t"
- // Begin
- "0: \n\t"
- "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
- "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
- "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "pshufw $0xFF, %%mm1, %%mm1 \n\t"
- "1: \n\t"
- "pshufw $0xFF, %%mm0, %%mm0 \n\t"
- "2: \n\t"
- "psubw %%mm1, %%mm0 \n\t"
- "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
- "pmullw %%mm3, %%mm0 \n\t"
- "psllw $7, %%mm1 \n\t"
- "paddw %%mm1, %%mm0 \n\t"
-
- "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
-
- "add $8, %%"REG_a" \n\t"
- // End
- "9: \n\t"
- // "int $3 \n\t"
- "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
- "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
- "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
- "dec %1 \n\t"
- "dec %2 \n\t"
- "sub %0, %1 \n\t"
- "sub %0, %2 \n\t"
- "lea " LOCAL_MANGLE(9b) ", %3 \n\t"
- "sub %0, %3 \n\t"
-
-
- : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
- "=r" (fragmentLengthA)
- );
-
- __asm__ volatile (
- "jmp 9f \n\t"
- // Begin
- "0: \n\t"
- "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
- "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "pshufw $0xFF, %%mm0, %%mm1 \n\t"
- "1: \n\t"
- "pshufw $0xFF, %%mm0, %%mm0 \n\t"
- "2: \n\t"
- "psubw %%mm1, %%mm0 \n\t"
- "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
- "pmullw %%mm3, %%mm0 \n\t"
- "psllw $7, %%mm1 \n\t"
- "paddw %%mm1, %%mm0 \n\t"
-
- "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
-
- "add $8, %%"REG_a" \n\t"
- // End
- "9: \n\t"
- // "int $3 \n\t"
- "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
- "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
- "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
- "dec %1 \n\t"
- "dec %2 \n\t"
- "sub %0, %1 \n\t"
- "sub %0, %2 \n\t"
- "lea " LOCAL_MANGLE(9b) ", %3 \n\t"
- "sub %0, %3 \n\t"
-
-
- : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
- "=r" (fragmentLengthB)
- );
-
- xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
- fragmentPos = 0;
-
- for (i = 0; i < dstW / numSplits; i++) {
- int xx = xpos >> 16;
-
- if ((i & 3) == 0) {
- int a = 0;
- int b = ((xpos + xInc) >> 16) - xx;
- int c = ((xpos + xInc * 2) >> 16) - xx;
- int d = ((xpos + xInc * 3) >> 16) - xx;
- int inc = (d + 1 < 4);
- uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA;
- x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A;
- x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A;
- x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA;
- int maxShift = 3 - (d + inc);
- int shift = 0;
-
- if (filterCode) {
- filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
- filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
- filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
- filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
- filterPos[i / 2] = xx;
-
- memcpy(filterCode + fragmentPos, fragment, fragmentLength);
-
- filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
- ((b + inc) << 2) |
- ((c + inc) << 4) |
- ((d + inc) << 6);
- filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
- (c << 4) |
- (d << 6);
-
- if (i + 4 - inc >= dstW)
- shift = maxShift; // avoid overread
- else if ((filterPos[i / 2] & 3) <= maxShift)
- shift = filterPos[i / 2] & 3; // align
-
- if (shift && i >= shift) {
- filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
- filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
- filterPos[i / 2] -= shift;
- }
- }
-
- fragmentPos += fragmentLength;
-
- if (filterCode)
- filterCode[fragmentPos] = RET;
- }
- xpos += xInc;
+ int64_t W, V, Z, Cy, Cu, Cv;
+ int64_t vr = table[0];
+ int64_t ub = table[1];
+ int64_t ug = -table[2];
+ int64_t vg = -table[3];
+ int64_t ONE = 65536;
+ int64_t cy = ONE;
+ uint8_t *p = (uint8_t*)c->input_rgb2yuv_table;
+ int i;
+ static const int8_t map[] = {
+ BY_IDX, GY_IDX, -1 , BY_IDX, BY_IDX, GY_IDX, -1 , BY_IDX,
+ RY_IDX, -1 , GY_IDX, RY_IDX, RY_IDX, -1 , GY_IDX, RY_IDX,
+ RY_IDX, GY_IDX, -1 , RY_IDX, RY_IDX, GY_IDX, -1 , RY_IDX,
+ BY_IDX, -1 , GY_IDX, BY_IDX, BY_IDX, -1 , GY_IDX, BY_IDX,
+ BU_IDX, GU_IDX, -1 , BU_IDX, BU_IDX, GU_IDX, -1 , BU_IDX,
+ RU_IDX, -1 , GU_IDX, RU_IDX, RU_IDX, -1 , GU_IDX, RU_IDX,
+ RU_IDX, GU_IDX, -1 , RU_IDX, RU_IDX, GU_IDX, -1 , RU_IDX,
+ BU_IDX, -1 , GU_IDX, BU_IDX, BU_IDX, -1 , GU_IDX, BU_IDX,
+ BV_IDX, GV_IDX, -1 , BV_IDX, BV_IDX, GV_IDX, -1 , BV_IDX,
+ RV_IDX, -1 , GV_IDX, RV_IDX, RV_IDX, -1 , GV_IDX, RV_IDX,
+ RV_IDX, GV_IDX, -1 , RV_IDX, RV_IDX, GV_IDX, -1 , RV_IDX,
+ BV_IDX, -1 , GV_IDX, BV_IDX, BV_IDX, -1 , GV_IDX, BV_IDX,
+ RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX,
+ BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX, BY_IDX, RY_IDX,
+ GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 ,
+ -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX, -1 , GY_IDX,
+ RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX,
+ BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX, BU_IDX, RU_IDX,
+ GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 ,
+ -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX, -1 , GU_IDX,
+ RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX,
+ BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX,
+ GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 ,
+ -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31
+ BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32
+ BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33
+ BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34
+ };
+
+ dstRange = 0; //FIXME range = 1 is handled elsewhere
+
+ if (!dstRange) {
+ cy = cy * 255 / 219;
+ } else {
+ vr = vr * 224 / 255;
+ ub = ub * 224 / 255;
+ ug = ug * 224 / 255;
+ vg = vg * 224 / 255;
}
- if (filterCode)
- filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part
-
- return fragmentPos + 1;
+ W = ROUNDED_DIV(ONE*ONE*ug, ub);
+ V = ROUNDED_DIV(ONE*ONE*vg, vr);
+ Z = ONE*ONE-W-V;
+
+ Cy = ROUNDED_DIV(cy*Z, ONE);
+ Cu = ROUNDED_DIV(ub*Z, ONE);
+ Cv = ROUNDED_DIV(vr*Z, ONE);
+
+ c->input_rgb2yuv_table[RY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cy);
+ c->input_rgb2yuv_table[GY_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cy);
+ c->input_rgb2yuv_table[BY_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cy);
+
+ c->input_rgb2yuv_table[RU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*V , Cu);
+ c->input_rgb2yuv_table[GU_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cu);
+ c->input_rgb2yuv_table[BU_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(Z+W) , Cu);
+
+ c->input_rgb2yuv_table[RV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*(V+Z) , Cv);
+ c->input_rgb2yuv_table[GV_IDX] = -ROUNDED_DIV((1 << RGB2YUV_SHIFT)*ONE*ONE , Cv);
+ c->input_rgb2yuv_table[BV_IDX] = ROUNDED_DIV((1 << RGB2YUV_SHIFT)*W , Cv);
+
+ if(/*!dstRange && */!memcmp(table, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], sizeof(ff_yuv2rgb_coeffs[SWS_CS_DEFAULT]))) {
+ c->input_rgb2yuv_table[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ }
+ for(i=0; i<FF_ARRAY_ELEMS(map); i++)
+ AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
}
-#endif /* HAVE_MMXEXT_INLINE */
-static void getSubSampleFactors(int *h, int *v, enum AVPixelFormat format)
+static void fill_xyztables(struct SwsContext *c)
{
- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
- *h = desc->log2_chroma_w;
- *v = desc->log2_chroma_h;
+ int i;
+ double xyzgamma = XYZ_GAMMA;
+ double rgbgamma = 1.0 / RGB_GAMMA;
+ double xyzgammainv = 1.0 / XYZ_GAMMA;
+ double rgbgammainv = RGB_GAMMA;
+ static const int16_t xyz2rgb_matrix[3][4] = {
+ {13270, -6295, -2041},
+ {-3969, 7682, 170},
+ { 228, -835, 4329} };
+ static const int16_t rgb2xyz_matrix[3][4] = {
+ {1689, 1464, 739},
+ { 871, 2929, 296},
+ { 79, 488, 3891} };
+ static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096], xyzgammainv_tab[4096], rgbgammainv_tab[4096];
+
+ memcpy(c->xyz2rgb_matrix, xyz2rgb_matrix, sizeof(c->xyz2rgb_matrix));
+ memcpy(c->rgb2xyz_matrix, rgb2xyz_matrix, sizeof(c->rgb2xyz_matrix));
+ c->xyzgamma = xyzgamma_tab;
+ c->rgbgamma = rgbgamma_tab;
+ c->xyzgammainv = xyzgammainv_tab;
+ c->rgbgammainv = rgbgammainv_tab;
+
+ if (rgbgamma_tab[4095])
+ return;
+
+ /* set gamma vectors */
+ for (i = 0; i < 4096; i++) {
+ xyzgamma_tab[i] = lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
+ rgbgamma_tab[i] = lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
+ xyzgammainv_tab[i] = lrint(pow(i / 4095.0, xyzgammainv) * 4095.0);
+ rgbgammainv_tab[i] = lrint(pow(i / 4095.0, rgbgammainv) * 4095.0);
+ }
}
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
int srcRange, const int table[4], int dstRange,
int brightness, int contrast, int saturation)
{
- const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
- const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(c->srcFormat);
- memcpy(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
- memcpy(c->dstColorspaceTable, table, sizeof(int) * 4);
+ const AVPixFmtDescriptor *desc_dst;
+ const AVPixFmtDescriptor *desc_src;
+ int need_reinit = 0;
+ memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
+ memmove(c->dstColorspaceTable, table, sizeof(int) * 4);
+
+ handle_formats(c);
+ desc_dst = av_pix_fmt_desc_get(c->dstFormat);
+ desc_src = av_pix_fmt_desc_get(c->srcFormat);
+
+ if(!isYUV(c->dstFormat) && !isGray(c->dstFormat))
+ dstRange = 0;
+ if(!isYUV(c->srcFormat) && !isGray(c->srcFormat))
+ srcRange = 0;
c->brightness = brightness;
c->contrast = contrast;
c->saturation = saturation;
+ if (c->srcRange != srcRange || c->dstRange != dstRange)
+ need_reinit = 1;
c->srcRange = srcRange;
c->dstRange = dstRange;
- if (isYUV(c->dstFormat) || isGray(c->dstFormat))
+
+ //The srcBpc check is possibly wrong but we seem to lack a definitive reference to test this
+ //and what we have in ticket 2939 looks better with this check
+ if (need_reinit && (c->srcBpc == 8 || !isYUV(c->srcFormat)))
+ ff_sws_init_range_convert(c);
+
+ if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat)))
return -1;
c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
- ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
- contrast, saturation);
- // FIXME factorize
+ if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) {
+ ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
+ contrast, saturation);
+ // FIXME factorize
+
+ if (ARCH_PPC)
+ ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness,
+ contrast, saturation);
+ }
+
+ fill_rgb2yuv_table(c, table, dstRange);
- if (ARCH_PPC)
- ff_yuv2rgb_init_tables_ppc(c, inv_table, brightness,
- contrast, saturation);
return 0;
}
@@ -823,7 +868,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
int *srcRange, int **table, int *dstRange,
int *brightness, int *contrast, int *saturation)
{
- if (isYUV(c->dstFormat) || isGray(c->dstFormat))
+ if (!c )
return -1;
*inv_table = c->srcColorspaceTable;
@@ -843,6 +888,9 @@ static int handle_jpeg(enum AVPixelFormat *format)
case AV_PIX_FMT_YUVJ420P:
*format = AV_PIX_FMT_YUV420P;
return 1;
+ case AV_PIX_FMT_YUVJ411P:
+ *format = AV_PIX_FMT_YUV411P;
+ return 1;
case AV_PIX_FMT_YUVJ422P:
*format = AV_PIX_FMT_YUV422P;
return 1;
@@ -852,15 +900,51 @@ static int handle_jpeg(enum AVPixelFormat *format)
case AV_PIX_FMT_YUVJ440P:
*format = AV_PIX_FMT_YUV440P;
return 1;
+ case AV_PIX_FMT_GRAY8:
+ case AV_PIX_FMT_GRAY16LE:
+ case AV_PIX_FMT_GRAY16BE:
+ return 1;
default:
return 0;
}
}
+static int handle_0alpha(enum AVPixelFormat *format)
+{
+ switch (*format) {
+ case AV_PIX_FMT_0BGR : *format = AV_PIX_FMT_ABGR ; return 1;
+ case AV_PIX_FMT_BGR0 : *format = AV_PIX_FMT_BGRA ; return 4;
+ case AV_PIX_FMT_0RGB : *format = AV_PIX_FMT_ARGB ; return 1;
+ case AV_PIX_FMT_RGB0 : *format = AV_PIX_FMT_RGBA ; return 4;
+ default: return 0;
+ }
+}
+
+static int handle_xyz(enum AVPixelFormat *format)
+{
+ switch (*format) {
+ case AV_PIX_FMT_XYZ12BE : *format = AV_PIX_FMT_RGB48BE; return 1;
+ case AV_PIX_FMT_XYZ12LE : *format = AV_PIX_FMT_RGB48LE; return 1;
+ default: return 0;
+ }
+}
+
+static void handle_formats(SwsContext *c)
+{
+ c->src0Alpha |= handle_0alpha(&c->srcFormat);
+ c->dst0Alpha |= handle_0alpha(&c->dstFormat);
+ c->srcXYZ |= handle_xyz(&c->srcFormat);
+ c->dstXYZ |= handle_xyz(&c->dstFormat);
+ if (c->srcXYZ || c->dstXYZ)
+ fill_xyztables(c);
+}
+
SwsContext *sws_alloc_context(void)
{
SwsContext *c = av_mallocz(sizeof(SwsContext));
+ av_assert0(offsetof(SwsContext, redDither) + DITHER32_INT == offsetof(SwsContext, dither32));
+
if (c) {
c->av_class = &sws_context_class;
av_opt_set_defaults(c);
@@ -872,7 +956,7 @@ SwsContext *sws_alloc_context(void)
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
{
- int i;
+ int i, j;
int usesVFilter, usesHFilter;
int unscaled;
SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
@@ -880,13 +964,12 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
int srcH = c->srcH;
int dstW = c->dstW;
int dstH = c->dstH;
- int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16);
- int dst_stride_px = dst_stride >> 1;
+ int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
int flags, cpu_flags;
enum AVPixelFormat srcFormat = c->srcFormat;
enum AVPixelFormat dstFormat = c->dstFormat;
- const AVPixFmtDescriptor *desc_src = av_pix_fmt_desc_get(srcFormat);
- const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(dstFormat);
+ const AVPixFmtDescriptor *desc_src;
+ const AVPixFmtDescriptor *desc_dst;
cpu_flags = av_get_cpu_flags();
flags = c->flags;
@@ -896,16 +979,33 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
unscaled = (srcW == dstW && srcH == dstH);
+ c->srcRange |= handle_jpeg(&c->srcFormat);
+ c->dstRange |= handle_jpeg(&c->dstFormat);
+
+ if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat)
+ av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n");
+
+ if (!c->contrast && !c->saturation && !c->dstFormatBpp)
+ sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange,
+ ff_yuv2rgb_coeffs[SWS_CS_DEFAULT],
+ c->dstRange, 0, 1 << 16, 1 << 16);
+
+ handle_formats(c);
+ srcFormat = c->srcFormat;
+ dstFormat = c->dstFormat;
+ desc_src = av_pix_fmt_desc_get(srcFormat);
+ desc_dst = av_pix_fmt_desc_get(dstFormat);
+
if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) &&
av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) {
if (!sws_isSupportedInput(srcFormat)) {
av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n",
- sws_format_name(srcFormat));
+ av_get_pix_fmt_name(srcFormat));
return AVERROR(EINVAL);
}
if (!sws_isSupportedOutput(dstFormat)) {
av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n",
- sws_format_name(dstFormat));
+ av_get_pix_fmt_name(dstFormat));
return AVERROR(EINVAL);
}
}
@@ -925,19 +1025,19 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
/* provide a default scaler if not set by caller */
if (!i) {
if (dstW < srcW && dstH < srcH)
- flags |= SWS_GAUSS;
+ flags |= SWS_BICUBIC;
else if (dstW > srcW && dstH > srcH)
- flags |= SWS_SINC;
+ flags |= SWS_BICUBIC;
else
- flags |= SWS_LANCZOS;
+ flags |= SWS_BICUBIC;
c->flags = flags;
} else if (i & (i - 1)) {
av_log(c, AV_LOG_ERROR,
- "Exactly one scaler algorithm must be chosen\n");
+ "Exactly one scaler algorithm must be chosen, got %X\n", i);
return AVERROR(EINVAL);
}
/* sanity check */
- if (srcW < 4 || srcH < 1 || dstW < 8 || dstH < 1) {
+ if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) {
/* FIXME check if these are enough and try to lower them after
* fixing the relevant parts of the code */
av_log(c, AV_LOG_ERROR, "%dx%d -> %dx%d is invalid scaling dimension\n",
@@ -965,9 +1065,56 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
(dstFilter->lumH && dstFilter->lumH->length > 1) ||
(dstFilter->chrH && dstFilter->chrH->length > 1);
- getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
- getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
+ av_pix_fmt_get_chroma_sub_sample(srcFormat, &c->chrSrcHSubSample, &c->chrSrcVSubSample);
+ av_pix_fmt_get_chroma_sub_sample(dstFormat, &c->chrDstHSubSample, &c->chrDstVSubSample);
+
+ if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
+ if (dstW&1) {
+ av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n");
+ flags |= SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+
+ if ( c->chrSrcHSubSample == 0
+ && c->chrSrcVSubSample == 0
+ && c->dither != SWS_DITHER_BAYER //SWS_FULL_CHR_H_INT is currently not supported with SWS_DITHER_BAYER
+ && !(c->flags & SWS_FAST_BILINEAR)
+ ) {
+ av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to input having non subsampled chroma\n");
+ flags |= SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+ }
+
+ if (c->dither == SWS_DITHER_AUTO) {
+ if (flags & SWS_ERROR_DIFFUSION)
+ c->dither = SWS_DITHER_ED;
+ }
+ if(dstFormat == AV_PIX_FMT_BGR4_BYTE ||
+ dstFormat == AV_PIX_FMT_RGB4_BYTE ||
+ dstFormat == AV_PIX_FMT_BGR8 ||
+ dstFormat == AV_PIX_FMT_RGB8) {
+ if (c->dither == SWS_DITHER_AUTO)
+ c->dither = (flags & SWS_FULL_CHR_H_INT) ? SWS_DITHER_ED : SWS_DITHER_BAYER;
+ if (!(flags & SWS_FULL_CHR_H_INT)) {
+ if (c->dither == SWS_DITHER_ED || c->dither == SWS_DITHER_A_DITHER || c->dither == SWS_DITHER_X_DITHER) {
+ av_log(c, AV_LOG_DEBUG,
+ "Desired dithering only supported in full chroma interpolation for destination format '%s'\n",
+ av_get_pix_fmt_name(dstFormat));
+ flags |= SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+ }
+ if (flags & SWS_FULL_CHR_H_INT) {
+ if (c->dither == SWS_DITHER_BAYER) {
+ av_log(c, AV_LOG_DEBUG,
+ "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
+ av_get_pix_fmt_name(dstFormat));
+ c->dither = SWS_DITHER_ED;
+ }
+ }
+ }
if (isPlanarRGB(dstFormat)) {
if (!(flags & SWS_FULL_CHR_H_INT)) {
av_log(c, AV_LOG_DEBUG,
@@ -988,10 +1135,15 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
dstFormat != AV_PIX_FMT_BGRA &&
dstFormat != AV_PIX_FMT_ABGR &&
dstFormat != AV_PIX_FMT_RGB24 &&
- dstFormat != AV_PIX_FMT_BGR24) {
- av_log(c, AV_LOG_ERROR,
+ dstFormat != AV_PIX_FMT_BGR24 &&
+ dstFormat != AV_PIX_FMT_BGR4_BYTE &&
+ dstFormat != AV_PIX_FMT_RGB4_BYTE &&
+ dstFormat != AV_PIX_FMT_BGR8 &&
+ dstFormat != AV_PIX_FMT_RGB8
+ ) {
+ av_log(c, AV_LOG_WARNING,
"full chroma interpolation for destination format '%s' not yet implemented\n",
- sws_format_name(dstFormat));
+ av_get_pix_fmt_name(dstFormat));
flags &= ~SWS_FULL_CHR_H_INT;
c->flags = flags;
}
@@ -1011,30 +1163,20 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
srcFormat != AV_PIX_FMT_RGB4_BYTE && srcFormat != AV_PIX_FMT_BGR4_BYTE &&
srcFormat != AV_PIX_FMT_GBRP9BE && srcFormat != AV_PIX_FMT_GBRP9LE &&
srcFormat != AV_PIX_FMT_GBRP10BE && srcFormat != AV_PIX_FMT_GBRP10LE &&
+ srcFormat != AV_PIX_FMT_GBRP12BE && srcFormat != AV_PIX_FMT_GBRP12LE &&
+ srcFormat != AV_PIX_FMT_GBRP14BE && srcFormat != AV_PIX_FMT_GBRP14LE &&
srcFormat != AV_PIX_FMT_GBRP16BE && srcFormat != AV_PIX_FMT_GBRP16LE &&
((dstW >> c->chrDstHSubSample) <= (srcW >> 1) ||
(flags & SWS_FAST_BILINEAR)))
c->chrSrcHSubSample = 1;
- // Note the -((-x)>>y) is so that we always round toward +inf.
- c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample);
- c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample);
- c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
- c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
+ // Note the FF_CEIL_RSHIFT is so that we always round toward +inf.
+ c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample);
+ c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample);
+ c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample);
+ c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample);
- /* unscaled special cases */
- if (unscaled && !usesHFilter && !usesVFilter &&
- (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
- ff_get_unscaled_swscale(c);
-
- if (c->swscale) {
- if (flags & SWS_PRINT_INFO)
- av_log(c, AV_LOG_INFO,
- "using unscaled %s -> %s special converter\n",
- sws_format_name(srcFormat), sws_format_name(dstFormat));
- return 0;
- }
- }
+ FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
c->srcBpc = 1 + desc_src->comp[0].depth_minus1;
if (c->srcBpc < 8)
@@ -1042,21 +1184,23 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->dstBpc = 1 + desc_dst->comp[0].depth_minus1;
if (c->dstBpc < 8)
c->dstBpc = 8;
+ if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
+ c->srcBpc = 16;
if (c->dstBpc == 16)
dst_stride <<= 1;
- FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
- (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16,
- fail);
- if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 10) {
- c->canMMXEXTBeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
- (srcW & 15) == 0) ? 1 : 0;
- if (!c->canMMXEXTBeUsed && dstW >= srcW && (srcW & 15) == 0
+
+ if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) {
+ c->canMMXEXTBeUsed = dstW >= srcW && (dstW & 31) == 0 &&
+ c->chrDstW >= c->chrSrcW &&
+ (srcW & 15) == 0;
+ if (!c->canMMXEXTBeUsed && dstW >= srcW && c->chrDstW >= c->chrSrcW && (srcW & 15) == 0
+
&& (flags & SWS_FAST_BILINEAR)) {
if (flags & SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO,
"output width is not a multiple of 32 -> no MMXEXT scaler\n");
}
- if (usesHFilter)
+ if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat))
c->canMMXEXTBeUsed = 0;
} else
c->canMMXEXTBeUsed = 0;
@@ -1077,7 +1221,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrXInc += 20;
}
// we don't use the x86 asm scaler if MMX is available
- else if (INLINE_MMX(cpu_flags)) {
+ else if (INLINE_MMX(cpu_flags) && c->dstBpc <= 14) {
c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
}
@@ -1090,9 +1234,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#if HAVE_MMXEXT_INLINE
// can't downscale !!!
if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) {
- c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL,
+ c->lumMmxextFilterCodeSize = ff_init_hscaler_mmxext(dstW, c->lumXInc, NULL,
NULL, NULL, 8);
- c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc,
+ c->chrMmxextFilterCodeSize = ff_init_hscaler_mmxext(c->chrDstW, c->chrXInc,
NULL, NULL, NULL, 4);
#if USE_MMAP
@@ -1118,21 +1262,32 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize);
#endif
+#ifdef MAP_ANONYMOUS
+ if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED)
+#else
if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode)
+#endif
+ {
+ av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n");
return AVERROR(ENOMEM);
+ }
+
FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
- init_hscaler_mmxext(dstW, c->lumXInc, c->lumMmxextFilterCode,
- c->hLumFilter, c->hLumFilterPos, 8);
- init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
- c->hChrFilter, c->hChrFilterPos, 4);
+ ff_init_hscaler_mmxext( dstW, c->lumXInc, c->lumMmxextFilterCode,
+ c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
+ ff_init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
+ c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
#if USE_MMAP
- mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ);
- mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ);
+ if ( mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1
+ || mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ) == -1) {
+ av_log(c, AV_LOG_ERROR, "mprotect failed, cannot use fast bilinear scaler\n");
+ goto fail;
+ }
#endif
} else
#endif /* HAVE_MMXEXT_INLINE */
@@ -1145,14 +1300,18 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
srcW, dstW, filterAlign, 1 << 14,
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
cpu_flags, srcFilter->lumH, dstFilter->lumH,
- c->param, 1) < 0)
+ c->param,
+ get_local_pos(c, 0, 0, 0),
+ get_local_pos(c, 0, 0, 0)) < 0)
goto fail;
if (initFilter(&c->hChrFilter, &c->hChrFilterPos,
&c->hChrFilterSize, c->chrXInc,
c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
(flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
cpu_flags, srcFilter->chrH, dstFilter->chrH,
- c->param, 1) < 0)
+ c->param,
+ get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0),
+ get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0)) < 0)
goto fail;
}
} // initialize horizontal stuff
@@ -1166,14 +1325,19 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
cpu_flags, srcFilter->lumV, dstFilter->lumV,
- c->param, 0) < 0)
+ c->param,
+ get_local_pos(c, 0, 0, 1),
+ get_local_pos(c, 0, 0, 1)) < 0)
goto fail;
if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
c->chrYInc, c->chrSrcH, c->chrDstH,
filterAlign, (1 << 12),
(flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
cpu_flags, srcFilter->chrV, dstFilter->chrV,
- c->param, 0) < 0)
+ c->param,
+ get_local_pos(c, c->chrSrcVSubSample, c->src_v_chr_pos, 1),
+ get_local_pos(c, c->chrDstVSubSample, c->dst_v_chr_pos, 1)) < 0)
+
goto fail;
#if HAVE_ALTIVEC
@@ -1215,6 +1379,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->vChrFilterPos[chrI];
}
+ for (i = 0; i < 4; i++)
+ FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail);
+
/* Allocate pixbufs (we use dynamic allocation because otherwise we would
* need to allocate several megabytes to handle all possible cases) */
FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
@@ -1230,9 +1397,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
dst_stride + 16, fail);
c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
}
- // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate)
- c->uv_off_px = dst_stride_px + 64 / (c->dstBpc & ~7);
- c->uv_off_byte = dst_stride + 16;
+ // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
+ c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
+ c->uv_offx2 = dst_stride + 16;
for (i = 0; i < c->vChrBufSize; i++) {
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize],
dst_stride * 2 + 32, fail);
@@ -1249,38 +1416,30 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
// try to avoid drawing green stuff between the right end and the stride end
for (i = 0; i < c->vChrBufSize; i++)
- memset(c->chrUPixBuf[i], 64, dst_stride * 2 + 1);
+ if(desc_dst->comp[0].depth_minus1 == 15){
+ av_assert0(c->dstBpc > 14);
+ for(j=0; j<dst_stride/2+1; j++)
+ ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
+ } else
+ for(j=0; j<dst_stride+1; j++)
+ ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
- assert(c->chrDstH <= dstH);
+ av_assert0(c->chrDstH <= dstH);
if (flags & SWS_PRINT_INFO) {
- if (flags & SWS_FAST_BILINEAR)
- av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
- else if (flags & SWS_BILINEAR)
- av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
- else if (flags & SWS_BICUBIC)
- av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
- else if (flags & SWS_X)
- av_log(c, AV_LOG_INFO, "Experimental scaler, ");
- else if (flags & SWS_POINT)
- av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
- else if (flags & SWS_AREA)
- av_log(c, AV_LOG_INFO, "Area Averaging scaler, ");
- else if (flags & SWS_BICUBLIN)
- av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
- else if (flags & SWS_GAUSS)
- av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
- else if (flags & SWS_SINC)
- av_log(c, AV_LOG_INFO, "Sinc scaler, ");
- else if (flags & SWS_LANCZOS)
- av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
- else if (flags & SWS_SPLINE)
- av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
- else
- av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
+ const char *scaler = NULL, *cpucaps;
- av_log(c, AV_LOG_INFO, "from %s to %s%s ",
- sws_format_name(srcFormat),
+ for (i = 0; i < FF_ARRAY_ELEMS(scale_algorithms); i++) {
+ if (flags & scale_algorithms[i].flag) {
+ scaler = scale_algorithms[i].description;
+ break;
+ }
+ }
+ if (!scaler)
+ scaler = "ehh flags invalid?!";
+ av_log(c, AV_LOG_INFO, "%s scaler, from %s to %s%s ",
+ scaler,
+ av_get_pix_fmt_name(srcFormat),
#ifdef DITHER1XBPP
dstFormat == AV_PIX_FMT_BGR555 || dstFormat == AV_PIX_FMT_BGR565 ||
dstFormat == AV_PIX_FMT_RGB444BE || dstFormat == AV_PIX_FMT_RGB444LE ||
@@ -1289,18 +1448,20 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#else
"",
#endif
- sws_format_name(dstFormat));
+ av_get_pix_fmt_name(dstFormat));
if (INLINE_MMXEXT(cpu_flags))
- av_log(c, AV_LOG_INFO, "using MMXEXT\n");
+ cpucaps = "MMXEXT";
else if (INLINE_AMD3DNOW(cpu_flags))
- av_log(c, AV_LOG_INFO, "using 3DNOW\n");
+ cpucaps = "3DNOW";
else if (INLINE_MMX(cpu_flags))
- av_log(c, AV_LOG_INFO, "using MMX\n");
+ cpucaps = "MMX";
else if (PPC_ALTIVEC(cpu_flags))
- av_log(c, AV_LOG_INFO, "using AltiVec\n");
+ cpucaps = "AltiVec";
else
- av_log(c, AV_LOG_INFO, "using C\n");
+ cpucaps = "C";
+
+ av_log(c, AV_LOG_INFO, "using %s\n", cpucaps);
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
av_log(c, AV_LOG_DEBUG,
@@ -1312,6 +1473,20 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrXInc, c->chrYInc);
}
+ /* unscaled special cases */
+ if (unscaled && !usesHFilter && !usesVFilter &&
+ (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
+ ff_get_unscaled_swscale(c);
+
+ if (c->swscale) {
+ if (flags & SWS_PRINT_INFO)
+ av_log(c, AV_LOG_INFO,
+ "using unscaled %s -> %s special converter\n",
+ av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
+ return 0;
+ }
+ }
+
c->swscale = ff_getSwsFunc(c);
return 0;
fail: // FIXME replace things by appropriate error codes
@@ -1333,8 +1508,6 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
c->srcH = srcH;
c->dstW = dstW;
c->dstH = dstH;
- c->srcRange = handle_jpeg(&srcFormat);
- c->dstRange = handle_jpeg(&dstFormat);
c->srcFormat = srcFormat;
c->dstFormat = dstFormat;
@@ -1342,9 +1515,6 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
c->param[0] = param[0];
c->param[1] = param[1];
}
- sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], c->srcRange,
- ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/,
- c->dstRange, 0, 1 << 16, 1 << 16);
if (sws_init_context(c, srcFilter, dstFilter) < 0) {
sws_freeContext(c);
@@ -1418,7 +1588,12 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
SwsVector *sws_allocVec(int length)
{
- SwsVector *vec = av_malloc(sizeof(SwsVector));
+ SwsVector *vec;
+
+ if(length <= 0 || length > INT_MAX/ sizeof(double))
+ return NULL;
+
+ vec = av_malloc(sizeof(SwsVector));
if (!vec)
return NULL;
vec->length = length;
@@ -1433,7 +1608,12 @@ SwsVector *sws_getGaussianVec(double variance, double quality)
const int length = (int)(variance * quality + 0.5) | 1;
int i;
double middle = (length - 1) * 0.5;
- SwsVector *vec = sws_allocVec(length);
+ SwsVector *vec;
+
+ if(variance < 0 || quality < 0)
+ return NULL;
+
+ vec = sws_allocVec(length);
if (!vec)
return NULL;
@@ -1600,14 +1780,12 @@ void sws_convVec(SwsVector *a, SwsVector *b)
SwsVector *sws_cloneVec(SwsVector *a)
{
- int i;
SwsVector *vec = sws_allocVec(a->length);
if (!vec)
return NULL;
- for (i = 0; i < a->length; i++)
- vec->coeff[i] = a->coeff[i];
+ memcpy(vec->coeff, a->coeff, a->length * sizeof(*a->coeff));
return vec;
}
@@ -1652,14 +1830,10 @@ void sws_freeFilter(SwsFilter *filter)
if (!filter)
return;
- if (filter->lumH)
- sws_freeVec(filter->lumH);
- if (filter->lumV)
- sws_freeVec(filter->lumV);
- if (filter->chrH)
- sws_freeVec(filter->chrH);
- if (filter->chrV)
- sws_freeVec(filter->chrV);
+ sws_freeVec(filter->lumH);
+ sws_freeVec(filter->lumV);
+ sws_freeVec(filter->chrH);
+ sws_freeVec(filter->chrV);
av_free(filter);
}
@@ -1688,6 +1862,9 @@ void sws_freeContext(SwsContext *c)
av_freep(&c->alpPixBuf);
}
+ for (i = 0; i < 4; i++)
+ av_freep(&c->dither_error[i]);
+
av_freep(&c->vLumFilter);
av_freep(&c->vChrFilter);
av_freep(&c->hLumFilter);
@@ -1722,7 +1899,7 @@ void sws_freeContext(SwsContext *c)
#endif /* HAVE_MMX_INLINE */
av_freep(&c->yuvTable);
- av_free(c->formatConvBuffer);
+ av_freep(&c->formatConvBuffer);
av_free(c);
}
@@ -1760,19 +1937,13 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
return NULL;
context->srcW = srcW;
context->srcH = srcH;
- context->srcRange = handle_jpeg(&srcFormat);
context->srcFormat = srcFormat;
context->dstW = dstW;
context->dstH = dstH;
- context->dstRange = handle_jpeg(&dstFormat);
context->dstFormat = dstFormat;
context->flags = flags;
context->param[0] = param[0];
context->param[1] = param[1];
- sws_setColorspaceDetails(context, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT],
- context->srcRange,
- ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/,
- context->dstRange, 0, 1 << 16, 1 << 16);
if (sws_init_context(context, srcFilter, dstFilter) < 0) {
sws_freeContext(context);
return NULL;
diff --git a/libswscale/version.h b/libswscale/version.h
index 7213ab3..09da910 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -27,8 +27,8 @@
#include "libavutil/version.h"
#define LIBSWSCALE_VERSION_MAJOR 2
-#define LIBSWSCALE_VERSION_MINOR 1
-#define LIBSWSCALE_VERSION_MICRO 3
+#define LIBSWSCALE_VERSION_MINOR 6
+#define LIBSWSCALE_VERSION_MICRO 101
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
LIBSWSCALE_VERSION_MINOR, \
@@ -49,6 +49,9 @@
#ifndef FF_API_SWS_CPU_CAPS
#define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3)
#endif
+#ifndef FF_API_SWS_FORMAT_NAME
+#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 3)
+#endif
#ifndef FF_API_ARCH_BFIN
#define FF_API_ARCH_BFIN (LIBSWSCALE_VERSION_MAJOR < 3)
#endif
diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index b94b14a..6901207 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -1,7 +1,11 @@
+$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
+
OBJS += x86/rgb2rgb.o \
x86/swscale.o \
x86/yuv2rgb.o \
+MMX-OBJS += x86/hscale_fast_bilinear_simd.o \
+
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
YASM-OBJS += x86/input.o \
diff --git a/libswscale/x86/hscale_fast_bilinear_simd.c b/libswscale/x86/hscale_fast_bilinear_simd.c
new file mode 100644
index 0000000..103793d
--- /dev/null
+++ b/libswscale/x86/hscale_fast_bilinear_simd.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../swscale_internal.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+
+#define RET 0xC3 // near return opcode for x86
+#define PREFETCH "prefetchnta"
+
+#if HAVE_INLINE_ASM
+av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
+ int16_t *filter, int32_t *filterPos,
+ int numSplits)
+{
+ uint8_t *fragmentA;
+ x86_reg imm8OfPShufW1A;
+ x86_reg imm8OfPShufW2A;
+ x86_reg fragmentLengthA;
+ uint8_t *fragmentB;
+ x86_reg imm8OfPShufW1B;
+ x86_reg imm8OfPShufW2B;
+ x86_reg fragmentLengthB;
+ int fragmentPos;
+
+ int xpos, i;
+
+ // create an optimized horizontal scaling routine
+ /* This scaler is made of runtime-generated MMXEXT code using specially tuned
+ * pshufw instructions. For every four output pixels, if four input pixels
+ * are enough for the fast bilinear scaling, then a chunk of fragmentB is
+ * used. If five input pixels are needed, then a chunk of fragmentA is used.
+ */
+
+ // code fragment
+
+ __asm__ volatile (
+ "jmp 9f \n\t"
+ // Begin
+ "0: \n\t"
+ "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
+ "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
+ "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "pshufw $0xFF, %%mm1, %%mm1 \n\t"
+ "1: \n\t"
+ "pshufw $0xFF, %%mm0, %%mm0 \n\t"
+ "2: \n\t"
+ "psubw %%mm1, %%mm0 \n\t"
+ "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
+ "pmullw %%mm3, %%mm0 \n\t"
+ "psllw $7, %%mm1 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+
+ "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
+
+ "add $8, %%"REG_a" \n\t"
+ // End
+ "9: \n\t"
+ // "int $3 \n\t"
+ "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
+ "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
+ "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
+ "dec %1 \n\t"
+ "dec %2 \n\t"
+ "sub %0, %1 \n\t"
+ "sub %0, %2 \n\t"
+ "lea " LOCAL_MANGLE(9b) ", %3 \n\t"
+ "sub %0, %3 \n\t"
+
+
+ : "=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
+ "=r" (fragmentLengthA)
+ );
+
+ __asm__ volatile (
+ "jmp 9f \n\t"
+ // Begin
+ "0: \n\t"
+ "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
+ "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "pshufw $0xFF, %%mm0, %%mm1 \n\t"
+ "1: \n\t"
+ "pshufw $0xFF, %%mm0, %%mm0 \n\t"
+ "2: \n\t"
+ "psubw %%mm1, %%mm0 \n\t"
+ "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
+ "pmullw %%mm3, %%mm0 \n\t"
+ "psllw $7, %%mm1 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+
+ "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
+
+ "add $8, %%"REG_a" \n\t"
+ // End
+ "9: \n\t"
+ // "int $3 \n\t"
+ "lea " LOCAL_MANGLE(0b) ", %0 \n\t"
+ "lea " LOCAL_MANGLE(1b) ", %1 \n\t"
+ "lea " LOCAL_MANGLE(2b) ", %2 \n\t"
+ "dec %1 \n\t"
+ "dec %2 \n\t"
+ "sub %0, %1 \n\t"
+ "sub %0, %2 \n\t"
+ "lea " LOCAL_MANGLE(9b) ", %3 \n\t"
+ "sub %0, %3 \n\t"
+
+
+ : "=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
+ "=r" (fragmentLengthB)
+ );
+
+ xpos = 0; // lumXInc/2 - 0x8000; // difference between pixel centers
+ fragmentPos = 0;
+
+ for (i = 0; i < dstW / numSplits; i++) {
+ int xx = xpos >> 16;
+
+ if ((i & 3) == 0) {
+ int a = 0;
+ int b = ((xpos + xInc) >> 16) - xx;
+ int c = ((xpos + xInc * 2) >> 16) - xx;
+ int d = ((xpos + xInc * 3) >> 16) - xx;
+ int inc = (d + 1 < 4);
+ uint8_t *fragment = inc ? fragmentB : fragmentA;
+ x86_reg imm8OfPShufW1 = inc ? imm8OfPShufW1B : imm8OfPShufW1A;
+ x86_reg imm8OfPShufW2 = inc ? imm8OfPShufW2B : imm8OfPShufW2A;
+ x86_reg fragmentLength = inc ? fragmentLengthB : fragmentLengthA;
+ int maxShift = 3 - (d + inc);
+ int shift = 0;
+
+ if (filterCode) {
+ filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
+ filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
+ filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
+ filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
+ filterPos[i / 2] = xx;
+
+ memcpy(filterCode + fragmentPos, fragment, fragmentLength);
+
+ filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
+ ((b + inc) << 2) |
+ ((c + inc) << 4) |
+ ((d + inc) << 6);
+ filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
+ (c << 4) |
+ (d << 6);
+
+ if (i + 4 - inc >= dstW)
+ shift = maxShift; // avoid overread
+ else if ((filterPos[i / 2] & 3) <= maxShift)
+ shift = filterPos[i / 2] & 3; // align
+
+ if (shift && i >= shift) {
+ filterCode[fragmentPos + imm8OfPShufW1] += 0x55 * shift;
+ filterCode[fragmentPos + imm8OfPShufW2] += 0x55 * shift;
+ filterPos[i / 2] -= shift;
+ }
+ }
+
+ fragmentPos += fragmentLength;
+
+ if (filterCode)
+ filterCode[fragmentPos] = RET;
+ }
+ xpos += xInc;
+ }
+ if (filterCode)
+ filterPos[((i / 2) + 1) & (~1)] = xpos >> 16; // needed to jump to the next part
+
+ return fragmentPos + 1;
+}
+
+void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
+ int dstWidth, const uint8_t *src,
+ int srcW, int xInc)
+{
+ int32_t *filterPos = c->hLumFilterPos;
+ int16_t *filter = c->hLumFilter;
+ void *mmxextFilterCode = c->lumMmxextFilterCode;
+ int i;
+#if defined(PIC)
+ uint64_t ebxsave;
+#endif
+#if ARCH_X86_64
+ uint64_t retsave;
+#endif
+
+ __asm__ volatile(
+#if defined(PIC)
+ "mov %%"REG_b", %5 \n\t"
+#if ARCH_X86_64
+ "mov -8(%%rsp), %%"REG_a" \n\t"
+ "mov %%"REG_a", %6 \n\t"
+#endif
+#else
+#if ARCH_X86_64
+ "mov -8(%%rsp), %%"REG_a" \n\t"
+ "mov %%"REG_a", %5 \n\t"
+#endif
+#endif
+ "pxor %%mm7, %%mm7 \n\t"
+ "mov %0, %%"REG_c" \n\t"
+ "mov %1, %%"REG_D" \n\t"
+ "mov %2, %%"REG_d" \n\t"
+ "mov %3, %%"REG_b" \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t" // i
+ PREFETCH" (%%"REG_c") \n\t"
+ PREFETCH" 32(%%"REG_c") \n\t"
+ PREFETCH" 64(%%"REG_c") \n\t"
+
+#if ARCH_X86_64
+#define CALL_MMXEXT_FILTER_CODE \
+ "movl (%%"REG_b"), %%esi \n\t"\
+ "call *%4 \n\t"\
+ "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
+ "add %%"REG_S", %%"REG_c" \n\t"\
+ "add %%"REG_a", %%"REG_D" \n\t"\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+
+#else
+#define CALL_MMXEXT_FILTER_CODE \
+ "movl (%%"REG_b"), %%esi \n\t"\
+ "call *%4 \n\t"\
+ "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
+ "add %%"REG_a", %%"REG_D" \n\t"\
+ "xor %%"REG_a", %%"REG_a" \n\t"\
+
+#endif /* ARCH_X86_64 */
+
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+
+#if defined(PIC)
+ "mov %5, %%"REG_b" \n\t"
+#if ARCH_X86_64
+ "mov %6, %%"REG_a" \n\t"
+ "mov %%"REG_a", -8(%%rsp) \n\t"
+#endif
+#else
+#if ARCH_X86_64
+ "mov %5, %%"REG_a" \n\t"
+ "mov %%"REG_a", -8(%%rsp) \n\t"
+#endif
+#endif
+ :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
+ "m" (mmxextFilterCode)
+#if defined(PIC)
+ ,"m" (ebxsave)
+#endif
+#if ARCH_X86_64
+ ,"m"(retsave)
+#endif
+ : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+#if !defined(PIC)
+ ,"%"REG_b
+#endif
+ );
+
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
+}
+
+void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
+ int dstWidth, const uint8_t *src1,
+ const uint8_t *src2, int srcW, int xInc)
+{
+ int32_t *filterPos = c->hChrFilterPos;
+ int16_t *filter = c->hChrFilter;
+ void *mmxextFilterCode = c->chrMmxextFilterCode;
+ int i;
+#if defined(PIC)
+ DECLARE_ALIGNED(8, uint64_t, ebxsave);
+#endif
+#if ARCH_X86_64
+ DECLARE_ALIGNED(8, uint64_t, retsave);
+#endif
+
+ __asm__ volatile(
+#if defined(PIC)
+ "mov %%"REG_b", %7 \n\t"
+#if ARCH_X86_64
+ "mov -8(%%rsp), %%"REG_a" \n\t"
+ "mov %%"REG_a", %8 \n\t"
+#endif
+#else
+#if ARCH_X86_64
+ "mov -8(%%rsp), %%"REG_a" \n\t"
+ "mov %%"REG_a", %7 \n\t"
+#endif
+#endif
+ "pxor %%mm7, %%mm7 \n\t"
+ "mov %0, %%"REG_c" \n\t"
+ "mov %1, %%"REG_D" \n\t"
+ "mov %2, %%"REG_d" \n\t"
+ "mov %3, %%"REG_b" \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t" // i
+ PREFETCH" (%%"REG_c") \n\t"
+ PREFETCH" 32(%%"REG_c") \n\t"
+ PREFETCH" 64(%%"REG_c") \n\t"
+
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ "xor %%"REG_a", %%"REG_a" \n\t" // i
+ "mov %5, %%"REG_c" \n\t" // src
+ "mov %6, %%"REG_D" \n\t" // buf2
+ PREFETCH" (%%"REG_c") \n\t"
+ PREFETCH" 32(%%"REG_c") \n\t"
+ PREFETCH" 64(%%"REG_c") \n\t"
+
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+ CALL_MMXEXT_FILTER_CODE
+
+#if defined(PIC)
+ "mov %7, %%"REG_b" \n\t"
+#if ARCH_X86_64
+ "mov %8, %%"REG_a" \n\t"
+ "mov %%"REG_a", -8(%%rsp) \n\t"
+#endif
+#else
+#if ARCH_X86_64
+ "mov %7, %%"REG_a" \n\t"
+ "mov %%"REG_a", -8(%%rsp) \n\t"
+#endif
+#endif
+ :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
+ "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
+#if defined(PIC)
+ ,"m" (ebxsave)
+#endif
+#if ARCH_X86_64
+ ,"m"(retsave)
+#endif
+ : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
+#if !defined(PIC)
+ ,"%"REG_b
+#endif
+ );
+
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
+}
+#endif //HAVE_INLINE_ASM
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index 6f5677e..af9afca 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -4,20 +4,20 @@
;* into YUV planes also.
;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -35,33 +35,59 @@ SECTION_RODATA
%define GV 0xD0E3
%define BV 0xF6E4
-rgb_Yrnd: times 4 dd 0x84000 ; 16.5 << 15
-rgb_UVrnd: times 4 dd 0x404000 ; 128.5 << 15
-bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
-bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
-rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
-rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY
-bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU
-bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU
-rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU
-rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU
-bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV
-bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
-rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
-rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
-
-rgba_Ycoeff_rb: times 4 dw RY, BY
-rgba_Ycoeff_br: times 4 dw BY, RY
-rgba_Ycoeff_ga: times 4 dw GY, 0
-rgba_Ycoeff_ag: times 4 dw 0, GY
-rgba_Ucoeff_rb: times 4 dw RU, BU
-rgba_Ucoeff_br: times 4 dw BU, RU
-rgba_Ucoeff_ga: times 4 dw GU, 0
-rgba_Ucoeff_ag: times 4 dw 0, GU
-rgba_Vcoeff_rb: times 4 dw RV, BV
-rgba_Vcoeff_br: times 4 dw BV, RV
-rgba_Vcoeff_ga: times 4 dw GV, 0
-rgba_Vcoeff_ag: times 4 dw 0, GV
+rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15
+rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15
+%define bgr_Ycoeff_12x4 16*4 + 16* 0 + tableq
+%define bgr_Ycoeff_3x56 16*4 + 16* 1 + tableq
+%define rgb_Ycoeff_12x4 16*4 + 16* 2 + tableq
+%define rgb_Ycoeff_3x56 16*4 + 16* 3 + tableq
+%define bgr_Ucoeff_12x4 16*4 + 16* 4 + tableq
+%define bgr_Ucoeff_3x56 16*4 + 16* 5 + tableq
+%define rgb_Ucoeff_12x4 16*4 + 16* 6 + tableq
+%define rgb_Ucoeff_3x56 16*4 + 16* 7 + tableq
+%define bgr_Vcoeff_12x4 16*4 + 16* 8 + tableq
+%define bgr_Vcoeff_3x56 16*4 + 16* 9 + tableq
+%define rgb_Vcoeff_12x4 16*4 + 16*10 + tableq
+%define rgb_Vcoeff_3x56 16*4 + 16*11 + tableq
+
+%define rgba_Ycoeff_rb 16*4 + 16*12 + tableq
+%define rgba_Ycoeff_br 16*4 + 16*13 + tableq
+%define rgba_Ycoeff_ga 16*4 + 16*14 + tableq
+%define rgba_Ycoeff_ag 16*4 + 16*15 + tableq
+%define rgba_Ucoeff_rb 16*4 + 16*16 + tableq
+%define rgba_Ucoeff_br 16*4 + 16*17 + tableq
+%define rgba_Ucoeff_ga 16*4 + 16*18 + tableq
+%define rgba_Ucoeff_ag 16*4 + 16*19 + tableq
+%define rgba_Vcoeff_rb 16*4 + 16*20 + tableq
+%define rgba_Vcoeff_br 16*4 + 16*21 + tableq
+%define rgba_Vcoeff_ga 16*4 + 16*22 + tableq
+%define rgba_Vcoeff_ag 16*4 + 16*23 + tableq
+
+; bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
+; bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
+; rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
+; rgb_Ycoeff_3x56: times 2 dw BY, 0, GY, BY
+; bgr_Ucoeff_12x4: times 2 dw BU, GU, 0, BU
+; bgr_Ucoeff_3x56: times 2 dw RU, 0, GU, RU
+; rgb_Ucoeff_12x4: times 2 dw RU, GU, 0, RU
+; rgb_Ucoeff_3x56: times 2 dw BU, 0, GU, BU
+; bgr_Vcoeff_12x4: times 2 dw BV, GV, 0, BV
+; bgr_Vcoeff_3x56: times 2 dw RV, 0, GV, RV
+; rgb_Vcoeff_12x4: times 2 dw RV, GV, 0, RV
+; rgb_Vcoeff_3x56: times 2 dw BV, 0, GV, BV
+
+; rgba_Ycoeff_rb: times 4 dw RY, BY
+; rgba_Ycoeff_br: times 4 dw BY, RY
+; rgba_Ycoeff_ga: times 4 dw GY, 0
+; rgba_Ycoeff_ag: times 4 dw 0, GY
+; rgba_Ucoeff_rb: times 4 dw RU, BU
+; rgba_Ucoeff_br: times 4 dw BU, RU
+; rgba_Ucoeff_ga: times 4 dw GU, 0
+; rgba_Ucoeff_ag: times 4 dw 0, GU
+; rgba_Vcoeff_rb: times 4 dw RV, BV
+; rgba_Vcoeff_br: times 4 dw BV, RV
+; rgba_Vcoeff_ga: times 4 dw GV, 0
+; rgba_Vcoeff_ag: times 4 dw 0, GV
shuf_rgb_12x4: db 0, 0x80, 1, 0x80, 2, 0x80, 3, 0x80, \
6, 0x80, 7, 0x80, 8, 0x80, 9, 0x80
@@ -82,7 +108,7 @@ SECTION .text
; %1 = nr. of XMM registers
; %2 = rgb or bgr
%macro RGB24_TO_Y_FN 2-3
-cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
+cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table
%if mmsize == 8
mova m5, [%2_Ycoeff_12x4]
mova m6, [%2_Ycoeff_3x56]
@@ -114,6 +140,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
%if ARCH_X86_64
movsxd wq, wd
%endif
+ add wq, wq
add dstq, wq
neg wq
%if notcpuflag(ssse3)
@@ -157,12 +184,11 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7]
paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] }
paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] }
- psrad m0, 15
- psrad m2, 15
+ psrad m0, 9
+ psrad m2, 9
packssdw m0, m2 ; (word) { Y[0-7] }
- packuswb m0, m0 ; (byte) { Y[0-7] }
- movh [dstq+wq], m0
- add wq, mmsize / 2
+ mova [dstq+wq], m0
+ add wq, mmsize
jl .loop
REP_RET
%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
@@ -171,7 +197,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
; %1 = nr. of XMM registers
; %2 = rgb or bgr
%macro RGB24_TO_UV_FN 2-3
-cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
%if ARCH_X86_64
mova m8, [%2_Ucoeff_12x4]
mova m9, [%2_Ucoeff_3x56]
@@ -202,10 +228,11 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w
%endif ; x86-32/64
%endif ; cpuflag(ssse3)
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
+ add wq, wq
add dstUq, wq
add dstVq, wq
neg wq
@@ -263,23 +290,20 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w
paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] }
paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] }
paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] }
- psrad m0, 15
- psrad m2, 15
- psrad m1, 15
- psrad m4, 15
+ psrad m0, 9
+ psrad m2, 9
+ psrad m1, 9
+ psrad m4, 9
packssdw m0, m1 ; (word) { U[0-7] }
packssdw m2, m4 ; (word) { V[0-7] }
%if mmsize == 8
- packuswb m0, m0 ; (byte) { U[0-3] }
- packuswb m2, m2 ; (byte) { V[0-3] }
- movh [dstUq+wq], m0
- movh [dstVq+wq], m2
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%else ; mmsize == 16
- packuswb m0, m2 ; (byte) { U[0-7], V[0-7] }
- movh [dstUq+wq], m0
- movhps [dstVq+wq], m0
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%endif ; mmsize == 8/16
- add wq, mmsize / 2
+ add wq, mmsize
jl .loop
REP_RET
%endif ; ARCH_X86_64 && %0 == 3
@@ -305,13 +329,15 @@ RGB24_FUNCS 10, 12
INIT_XMM ssse3
RGB24_FUNCS 11, 13
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
RGB24_FUNCS 11, 13
+%endif
; %1 = nr. of XMM registers
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
%macro RGB32_TO_Y_FN 5-6
-cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
+cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, table
mova m5, [rgba_Ycoeff_%2%4]
mova m6, [rgba_Ycoeff_%3%5]
%if %0 == 6
@@ -321,7 +347,9 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
%if ARCH_X86_64
movsxd wq, wd
%endif
- lea srcq, [srcq+wq*4]
+ add wq, wq
+ sub wq, mmsize - 1
+ lea srcq, [srcq+wq*2]
add dstq, wq
neg wq
mova m4, [rgb_Yrnd]
@@ -329,8 +357,8 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
psrlw m7, 8 ; (word) { 0x00ff } x4
.loop:
; FIXME check alignment and use mova
- movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
- movu m2, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
+ movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
@@ -340,13 +368,29 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
paddd m2, m4 ; += rgb_Yrnd
paddd m0, m1 ; (dword) { Y[0-3] }
paddd m2, m3 ; (dword) { Y[4-7] }
- psrad m0, 15
- psrad m2, 15
+ psrad m0, 9
+ psrad m2, 9
packssdw m0, m2 ; (word) { Y[0-7] }
- packuswb m0, m0 ; (byte) { Y[0-7] }
- movh [dstq+wq], m0
- add wq, mmsize / 2
+ mova [dstq+wq], m0
+ add wq, mmsize
jl .loop
+ sub wq, mmsize - 1
+ jz .end
+ add srcq, 2*mmsize - 2
+ add dstq, mmsize - 1
+.loop2:
+ movd m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
+ pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
+ pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
+ paddd m0, m4 ; += rgb_Yrnd
+ paddd m0, m1 ; (dword) { Y[0-3] }
+ psrad m0, 9
+ packssdw m0, m0 ; (word) { Y[0-7] }
+ movd [dstq+wq], m0
+ add wq, 2
+ jl .loop2
+.end:
REP_RET
%endif ; %0 == 3
%endmacro
@@ -354,7 +398,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
; %1 = nr. of XMM registers
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
%macro RGB32_TO_UV_FN 5-6
-cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table
%if ARCH_X86_64
mova m8, [rgba_Ucoeff_%2%4]
mova m9, [rgba_Ucoeff_%3%5]
@@ -375,21 +419,23 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w
%else ; ARCH_X86_64 && %0 == 6
.body:
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
+ add wq, wq
+ sub wq, mmsize - 1
add dstUq, wq
add dstVq, wq
- lea srcq, [srcq+wq*4]
+ lea srcq, [srcq+wq*2]
neg wq
pcmpeqb m7, m7
psrlw m7, 8 ; (word) { 0x00ff } x4
mova m6, [rgb_UVrnd]
.loop:
; FIXME check alignment and use mova
- movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
- movu m4, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
+ movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3]
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3]
@@ -405,26 +451,48 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w
pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7]
paddd m3, m6 ; += rgb_UVrnd
paddd m5, m6 ; += rgb_UVrnd
- psrad m0, 15
+ psrad m0, 9
paddd m1, m3 ; (dword) { V[4-7] }
paddd m4, m5 ; (dword) { U[4-7] }
- psrad m2, 15
- psrad m4, 15
- psrad m1, 15
+ psrad m2, 9
+ psrad m4, 9
+ psrad m1, 9
packssdw m0, m4 ; (word) { U[0-7] }
packssdw m2, m1 ; (word) { V[0-7] }
%if mmsize == 8
- packuswb m0, m0 ; (byte) { U[0-7] }
- packuswb m2, m2 ; (byte) { V[0-7] }
- movh [dstUq+wq], m0
- movh [dstVq+wq], m2
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%else ; mmsize == 16
- packuswb m0, m2 ; (byte) { U[0-7], V[0-7] }
- movh [dstUq+wq], m0
- movhps [dstVq+wq], m0
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%endif ; mmsize == 8/16
- add wq, mmsize / 2
+ add wq, mmsize
jl .loop
+ sub wq, mmsize - 1
+ jz .end
+ add srcq , 2*mmsize - 2
+ add dstUq, mmsize - 1
+ add dstVq, mmsize - 1
+.loop2:
+ movd m0, [srcq+wq*2] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
+ pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3]
+ pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3]
+ pmaddwd m1, coeffU1 ; (dword) { Bx*BU + Rx*RU }[0-3]
+ pmaddwd m0, coeffU2 ; (dword) { Gx*GU }[0-3]
+ paddd m3, m6 ; += rgb_UVrnd
+ paddd m1, m6 ; += rgb_UVrnd
+ paddd m2, m3 ; (dword) { V[0-3] }
+ paddd m0, m1 ; (dword) { U[0-3] }
+ psrad m0, 9
+ psrad m2, 9
+ packssdw m0, m0 ; (word) { U[0-7] }
+ packssdw m2, m2 ; (word) { V[0-7] }
+ movd [dstUq+wq], m0
+ movd [dstVq+wq], m2
+ add wq, 2
+ jl .loop2
+.end:
REP_RET
%endif ; ARCH_X86_64 && %0 == 3
%endmacro
@@ -451,8 +519,10 @@ RGB32_FUNCS 0, 0
INIT_XMM sse2
RGB32_FUNCS 8, 12
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
RGB32_FUNCS 8, 12
+%endif
;-----------------------------------------------------------------------------
; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
@@ -489,7 +559,7 @@ RGB32_FUNCS 8, 12
; will be the same (i.e. YUYV+AVX), and thus we don't need to
; split the loop in an aligned and unaligned case
%macro YUYV_TO_Y_FN 2-3
-cglobal %2ToY, 3, 3, %1, dst, src, w
+cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
%if ARCH_X86_64
movsxd wq, wd
%endif
@@ -559,11 +629,11 @@ cglobal %2ToY, 3, 3, %1, dst, src, w
; will be the same (i.e. UYVY+AVX), and thus we don't need to
; split the loop in an aligned and unaligned case
%macro YUYV_TO_UV_FN 2-3
-cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
add dstUq, wq
add dstVq, wq
@@ -593,8 +663,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
.loop_%1:
mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... }
mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... }
- pand m2, m0, m4 ; (word) { U0, U1, ..., U7 }
- pand m3, m1, m4 ; (word) { U8, U9, ..., U15 }
+ pand m2, m0, m5 ; (word) { U0, U1, ..., U7 }
+ pand m3, m1, m5 ; (word) { U8, U9, ..., U15 }
psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
psrlw m1, 8 ; (word) { V8, V9, ..., V15 }
packuswb m2, m3 ; (byte) { U0, ..., U15 }
@@ -614,11 +684,11 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
; %1 = nr. of XMM registers
; %2 = nv12 or nv21
%macro NVXX_TO_UV_FN 2
-cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
add dstUq, wq
add dstVq, wq
@@ -626,8 +696,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
test srcq, 15
%endif
lea srcq, [srcq+wq*2]
- pcmpeqb m4, m4 ; (byte) { 0xff } x 16
- psrlw m4, 8 ; (word) { 0x00ff } x 8
+ pcmpeqb m5, m5 ; (byte) { 0xff } x 16
+ psrlw m5, 8 ; (word) { 0x00ff } x 8
%if mmsize == 16
jnz .loop_u_start
neg wq
@@ -659,6 +729,7 @@ YUYV_TO_UV_FN 3, uyvy
NVXX_TO_UV_FN 5, nv12
NVXX_TO_UV_FN 5, nv21
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but
; that's not faster in practice
@@ -666,3 +737,4 @@ YUYV_TO_UV_FN 3, yuyv
YUYV_TO_UV_FN 3, uyvy, 1
NVXX_TO_UV_FN 5, nv12
NVXX_TO_UV_FN 5, nv21
+%endif
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index e1ceded..9ea4af9 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -3,20 +3,20 @@
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;* Kieran Kunhya <kieran@kunhya.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -264,10 +264,12 @@ yuv2planeX_fn 9, 7, 5
yuv2planeX_fn 10, 7, 5
yuv2planeX_fn 16, 8, 5
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
yuv2planeX_fn 8, 10, 7
yuv2planeX_fn 9, 7, 5
yuv2planeX_fn 10, 7, 5
+%endif
; %1=outout-bpc, %2=alignment (u/a)
%macro yuv2plane1_mainloop 2
@@ -402,8 +404,10 @@ yuv2plane1_fn 16, 6, 3
INIT_XMM sse4
yuv2plane1_fn 16, 5, 3
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
yuv2plane1_fn 8, 5, 5
yuv2plane1_fn 9, 5, 3
yuv2plane1_fn 10, 5, 3
yuv2plane1_fn 16, 5, 3
+%endif
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 9cfe831..b80e869 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -6,20 +6,20 @@
* Written by Nick Kurshev.
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -72,8 +72,14 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
+DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
+
+DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2YOffset);
+DECLARE_ALIGNED(8, extern const uint64_t, ff_w1111);
+DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
-#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
@@ -125,6 +131,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMXEXT 0
#define COMPILE_TEMPLATE_SSE2 0
+#define COMPILE_TEMPLATE_AVX 0
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3dnow
#include "rgb2rgb_template.c"
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 5d34c21..3899d0a 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -7,20 +7,20 @@
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
* lot of big-endian byte order fixes by Alex Beregszaszi
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -131,14 +131,11 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm4, %%mm3 \n\t" \
"psllq $48, %%mm2 \n\t" \
"psllq $32, %%mm3 \n\t" \
- "pand "MANGLE(mask24hh)", %%mm2\n\t" \
- "pand "MANGLE(mask24hhh)", %%mm3\n\t" \
"por %%mm2, %%mm0 \n\t" \
"psrlq $16, %%mm1 \n\t" \
"psrlq $32, %%mm4 \n\t" \
"psllq $16, %%mm5 \n\t" \
"por %%mm3, %%mm1 \n\t" \
- "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
"por %%mm5, %%mm4 \n\t" \
\
MOVNTQ" %%mm0, (%0) \n\t" \
@@ -168,6 +165,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"movq %%mm5, %%mm7 \n\t"
STORE_BGR24_MMX
:: "r"(dest), "r"(s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
dest += 24;
s += 32;
@@ -717,27 +715,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
}
}
-/*
- I use less accurate approximation here by simply left-shifting the input
- value and filling the low order bits with zeroes. This method improves PNG
- compression but this scheme cannot reproduce white exactly, since it does
- not generate an all-ones maximum value; the net effect is to darken the
- image slightly.
-
- The better method should be "left bit replication":
-
- 4 3 2 1 0
- ---------
- 1 1 0 1 1
-
- 7 6 5 4 3 2 1 0
- ----------------
- 1 1 0 1 1 1 1 0
- |=======| |===|
- | leftmost bits repeated to fill open bits
- |
- original bits
-*/
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
@@ -756,9 +733,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -786,9 +764,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -809,6 +788,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
:"=m"(*d)
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
+ NAMED_CONSTRAINTS_ADD(mul15_mid,mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -825,6 +805,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:: "r"(d), "m"(*s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
d += 24;
s += 8;
@@ -834,9 +815,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -858,9 +839,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -888,9 +871,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -910,6 +895,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"por %%mm5, %%mm3 \n\t"
:"=m"(*d)
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
+ NAMED_CONSTRAINTS_ADD(mul15_mid,mul16_mid,mul15_hi)
:"memory");
/* borrowed 32 to 24 */
__asm__ volatile(
@@ -926,6 +912,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
:: "r"(d), "m"(*s)
+ NAMED_CONSTRAINTS_ADD(mask24l,mask24h)
:"memory");
d += 24;
s += 8;
@@ -935,9 +922,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -980,11 +967,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw %5, %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+ ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
+ NAMED_CONSTRAINTS_ADD(mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -994,9 +983,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
}
}
@@ -1021,11 +1010,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+ ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
+ NAMED_CONSTRAINTS_ADD(mul16_mid,mul15_hi)
:"memory");
d += 16;
s += 4;
@@ -1035,9 +1027,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
}
}
@@ -1150,6 +1142,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"2: \n\t"
: "+a" (mmx_size)
: "r" (src-mmx_size), "r"(dst-mmx_size)
+ NAMED_CONSTRAINTS_ADD(mask24r,mask24g,mask24b)
);
__asm__ volatile(SFENCE:::"memory");
@@ -1485,6 +1478,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
"g" (-mmxSize)
+ NAMED_CONSTRAINTS_ADD(mmx_ff)
: "%"REG_a
);
@@ -1629,10 +1623,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
* others are ignored in the C version.
* FIXME: Write HQ version.
*/
+#if HAVE_7REGS
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int width, int height,
- int lumStride, int chromStride, int srcStride)
+ int lumStride, int chromStride, int srcStride,
+ int32_t *rgb2yuv)
{
+#define BGR2Y_IDX "16*4+16*32"
+#define BGR2U_IDX "16*4+16*33"
+#define BGR2V_IDX "16*4+16*34"
int y;
const x86_reg chromWidth= width>>1;
for (y=0; y<height-2; y+=2) {
@@ -1640,7 +1639,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
for (i=0; i<2; i++) {
__asm__ volatile(
"mov %2, %%"REG_a" \n\t"
- "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
+ "movq "BGR2Y_IDX"(%3), %%mm6 \n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
@@ -1659,12 +1658,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pmaddwd %%mm6, %%mm1 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
"pmaddwd %%mm6, %%mm3 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm0 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm0 \n\t"
@@ -1684,12 +1681,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"pmaddwd %%mm6, %%mm1 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
"pmaddwd %%mm6, %%mm3 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm4 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm1, %%mm4 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t"
@@ -1704,7 +1699,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
+ : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv)
+ NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2YOffset)
: "%"REG_a, "%"REG_d
);
ydst += lumStride;
@@ -1714,7 +1710,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
__asm__ volatile(
"mov %4, %%"REG_a" \n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t"
- "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
+ "movq "BGR2U_IDX"(%5), %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t"
"add %%"REG_d", %%"REG_d" \n\t"
@@ -1763,19 +1759,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm2 \n\t"
#endif
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
"pmaddwd %%mm0, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm0 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm0 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm2, %%mm0 \n\t"
"packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm0 \n\t"
@@ -1825,19 +1819,17 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm2 \n\t"
#endif
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t"
- "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm1 \n\t"
+ "movq "BGR2V_IDX"(%5), %%mm3 \n\t"
"pmaddwd %%mm4, %%mm1 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm4 \n\t"
"pmaddwd %%mm6, %%mm2 \n\t"
-#ifndef FAST_BGR2YV12
"psrad $8, %%mm4 \n\t"
"psrad $8, %%mm1 \n\t"
"psrad $8, %%mm2 \n\t"
"psrad $8, %%mm3 \n\t"
-#endif
"packssdw %%mm2, %%mm4 \n\t"
"packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t"
@@ -1856,7 +1848,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"movd %%mm0, (%3, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
" js 1b \n\t"
- : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
+ : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv)
+ NAMED_CONSTRAINTS_ADD(ff_w1111,ff_bgr2UVOffset)
: "%"REG_a, "%"REG_d
);
@@ -1869,8 +1862,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
SFENCE" \n\t"
:::"memory");
- rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride);
+ ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride, rgb2yuv);
}
+#endif /* HAVE_7REGS */
#endif /* !COMPILE_TEMPLATE_SSE2 */
#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
@@ -1945,9 +1939,13 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
}
#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src, const uint8_t *unused, int w,
+ const uint8_t *unused,
+ const uint8_t *src1,
+ const uint8_t *src2,
+ int w,
uint32_t *unused2);
static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
int width, int height, int srcStride,
@@ -1956,7 +1954,7 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t
int h;
for (h = 0; h < height; h++) {
- RENAME(ff_nv12ToUV)(dst1, dst2, src, NULL, width, NULL);
+ RENAME(ff_nv12ToUV)(dst1, dst2, NULL, src, NULL, width, NULL);
src += srcStride;
dst1 += dst1Stride;
dst2 += dst2Stride;
@@ -1968,6 +1966,7 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t
);
}
#endif /* !COMPILE_TEMPLATE_AMD3DNOW */
+#endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */
#if !COMPILE_TEMPLATE_SSE2
#if !COMPILE_TEMPLATE_AMD3DNOW
@@ -2187,6 +2186,44 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
}
}
+static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count)
+{
+ src ++;
+ dst += count;
+ src += 2*count;
+ count= - count;
+
+ if(count < -16) {
+ count += 16;
+ __asm__ volatile(
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "psrlw $8, %%mm7 \n\t"
+ "1: \n\t"
+ "movq -32(%1, %0, 2), %%mm0 \n\t"
+ "movq -24(%1, %0, 2), %%mm1 \n\t"
+ "movq -16(%1, %0, 2), %%mm2 \n\t"
+ "movq -8(%1, %0, 2), %%mm3 \n\t"
+ "pand %%mm7, %%mm0 \n\t"
+ "pand %%mm7, %%mm1 \n\t"
+ "pand %%mm7, %%mm2 \n\t"
+ "pand %%mm7, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ MOVNTQ" %%mm0,-16(%2, %0) \n\t"
+ MOVNTQ" %%mm2,- 8(%2, %0) \n\t"
+ "add $16, %0 \n\t"
+ " js 1b \n\t"
+ : "+r"(count)
+ : "r"(src), "r"(dst)
+ );
+ count -= 16;
+ }
+ while(count<0) {
+ dst[count]= src[2*count];
+ count++;
+ }
+}
+
#if !COMPILE_TEMPLATE_AMD3DNOW
static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count)
{
@@ -2397,7 +2434,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width);
@@ -2423,7 +2460,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width);
@@ -2447,10 +2484,10 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
- RENAME(extract_even)(src+1, ydst, width);
+ RENAME(extract_odd)(src, ydst, width);
if(y&1) {
RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
udst+= chromStride;
@@ -2473,10 +2510,10 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
int lumStride, int chromStride, int srcStride)
{
int y;
- const int chromWidth= -((-width)>>1);
+ const int chromWidth = FF_CEIL_RSHIFT(width, 1);
for (y=0; y<height; y++) {
- RENAME(extract_even)(src+1, ydst, width);
+ RENAME(extract_odd)(src, ydst, width);
RENAME(extract_even2)(src, udst, vdst, chromWidth);
src += srcStride;
@@ -2529,7 +2566,9 @@ static av_cold void RENAME(rgb2rgb_init)(void)
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
planar2x = RENAME(planar2x);
#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */
- rgb24toyv12 = RENAME(rgb24toyv12);
+#if HAVE_7REGS
+ ff_rgb24toyv12 = RENAME(rgb24toyv12);
+#endif /* HAVE_7REGS */
yuyvtoyuv420 = RENAME(yuyvtoyuv420);
uyvytoyuv420 = RENAME(uyvytoyuv420);
@@ -2538,7 +2577,9 @@ static av_cold void RENAME(rgb2rgb_init)(void)
#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX
interleaveBytes = RENAME(interleaveBytes);
#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */
+#if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL
#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_YASM
deinterleaveBytes = RENAME(deinterleaveBytes);
#endif
+#endif
}
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index 440a27b..7af92f7 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -2,20 +2,20 @@
;* x86-optimized horizontal line scaling functions
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -407,11 +407,15 @@ SCALE_FUNC %1, %2, X, X8, 7, %3
SCALE_FUNCS 8, 15, %1
SCALE_FUNCS 9, 15, %2
SCALE_FUNCS 10, 15, %2
+SCALE_FUNCS 12, 15, %2
+SCALE_FUNCS 14, 15, %2
SCALE_FUNCS 16, 15, %3
%endif ; !sse4
SCALE_FUNCS 8, 19, %1
SCALE_FUNCS 9, 19, %2
SCALE_FUNCS 10, 19, %2
+SCALE_FUNCS 12, 19, %2
+SCALE_FUNCS 14, 19, %2
SCALE_FUNCS 16, 19, %3
%endmacro
@@ -420,7 +424,7 @@ INIT_MMX mmx
SCALE_FUNCS2 0, 0, 0
%endif
INIT_XMM sse2
-SCALE_FUNCS2 6, 7, 8
+SCALE_FUNCS2 7, 6, 8
INIT_XMM ssse3
SCALE_FUNCS2 6, 6, 8
INIT_XMM sse4
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index f310a75..c4c0e28 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -23,6 +23,7 @@
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
@@ -57,19 +58,11 @@ DECLARE_ALIGNED(8, const uint64_t, ff_M24A) = 0x00FF0000FF0000FFLL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24B) = 0xFF0000FF0000FF00LL;
DECLARE_ALIGNED(8, const uint64_t, ff_M24C) = 0x0000FF0000FF0000LL;
-#ifdef FAST_BGR2YV12
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000000210041000DULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000FFEEFFDC0038ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00000038FFD2FFF8ULL;
-#else
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff) = 0x000020E540830C8BULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff) = 0x0000ED0FDAC23831ULL;
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff) = 0x00003831D0E6F6EAULL;
-#endif /* FAST_BGR2YV12 */
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
+
//MMX versions
#if HAVE_MMX_INLINE
#undef RENAME
@@ -117,9 +110,9 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
- const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int i;
if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
@@ -186,7 +179,7 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
*(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
- ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
+ ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
alpMmxFilter[4*i+2]=
@@ -197,12 +190,85 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
*(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
- ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
+ ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001U;
}
}
}
}
+#if HAVE_MMXEXT
+static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset)
+{
+ if(((int)dest) & 15){
+ yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
+ return;
+ }
+ if (offset) {
+ __asm__ volatile("movq (%0), %%xmm3\n\t"
+ "movdqa %%xmm3, %%xmm4\n\t"
+ "psrlq $24, %%xmm3\n\t"
+ "psllq $40, %%xmm4\n\t"
+ "por %%xmm4, %%xmm3\n\t"
+ :: "r"(dither)
+ );
+ } else {
+ __asm__ volatile("movq (%0), %%xmm3\n\t"
+ :: "r"(dither)
+ );
+ }
+ filterSize--;
+ __asm__ volatile(
+ "pxor %%xmm0, %%xmm0\n\t"
+ "punpcklbw %%xmm0, %%xmm3\n\t"
+ "movd %0, %%xmm1\n\t"
+ "punpcklwd %%xmm1, %%xmm1\n\t"
+ "punpckldq %%xmm1, %%xmm1\n\t"
+ "punpcklqdq %%xmm1, %%xmm1\n\t"
+ "psllw $3, %%xmm1\n\t"
+ "paddw %%xmm1, %%xmm3\n\t"
+ "psraw $4, %%xmm3\n\t"
+ ::"m"(filterSize)
+ );
+ __asm__ volatile(
+ "movdqa %%xmm3, %%xmm4\n\t"
+ "movdqa %%xmm3, %%xmm7\n\t"
+ "movl %3, %%ecx\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ".p2align 4 \n\t" /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\
+ "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\
+ "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "pmulhw %%xmm0, %%xmm2 \n\t"\
+ "pmulhw %%xmm0, %%xmm5 \n\t"\
+ "paddw %%xmm2, %%xmm3 \n\t"\
+ "paddw %%xmm5, %%xmm4 \n\t"\
+ " jnz 1b \n\t"\
+ "psraw $3, %%xmm3 \n\t"\
+ "psraw $3, %%xmm4 \n\t"\
+ "packuswb %%xmm4, %%xmm3 \n\t"
+ "movntdq %%xmm3, (%1, %%"REG_c")\n\t"
+ "add $16, %%"REG_c" \n\t"\
+ "cmp %2, %%"REG_c" \n\t"\
+ "movdqa %%xmm7, %%xmm3\n\t"
+ "movdqa %%xmm7, %%xmm4\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+ : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
+ "%"REG_d, "%"REG_S, "%"REG_c
+ );
+}
+#endif
+
#endif /* HAVE_INLINE_ASM */
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
@@ -216,10 +282,14 @@ void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
SCALE_FUNC(filter_n, 8, 15, opt); \
SCALE_FUNC(filter_n, 9, 15, opt); \
SCALE_FUNC(filter_n, 10, 15, opt); \
+ SCALE_FUNC(filter_n, 12, 15, opt); \
+ SCALE_FUNC(filter_n, 14, 15, opt); \
SCALE_FUNC(filter_n, 16, 15, opt); \
SCALE_FUNC(filter_n, 8, 19, opt); \
SCALE_FUNC(filter_n, 9, 19, opt); \
SCALE_FUNC(filter_n, 10, 19, opt); \
+ SCALE_FUNC(filter_n, 12, 19, opt); \
+ SCALE_FUNC(filter_n, 14, 19, opt); \
SCALE_FUNC(filter_n, 16, 19, opt)
#define SCALE_FUNCS_MMX(opt) \
@@ -275,11 +345,14 @@ VSCALE_FUNCS(avx, avx);
#define INPUT_Y_FUNC(fmt, opt) \
void ff_ ## fmt ## ToY_ ## opt(uint8_t *dst, const uint8_t *src, \
+ const uint8_t *unused1, const uint8_t *unused2, \
int w, uint32_t *unused)
#define INPUT_UV_FUNC(fmt, opt) \
void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
- const uint8_t *src, const uint8_t *unused1, \
- int w, uint32_t *unused2)
+ const uint8_t *unused0, \
+ const uint8_t *src1, \
+ const uint8_t *src2, \
+ int w, uint32_t *unused)
#define INPUT_FUNC(fmt, opt) \
INPUT_Y_FUNC(fmt, opt); \
INPUT_UV_FUNC(fmt, opt)
@@ -313,20 +386,31 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
#if HAVE_MMXEXT_INLINE
if (INLINE_MMXEXT(cpu_flags))
sws_init_swscale_mmxext(c);
+ if (cpu_flags & AV_CPU_FLAG_SSE3){
+ if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
+ c->yuv2planeX = yuv2yuvX_sse3;
+ }
#endif
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
ff_hscale8to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 9) { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
ff_hscale9to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 10) { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
ff_hscale10to19_ ## filtersize ## _ ## opt1; \
- } else /* c->srcBpc == 16 */ { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
+ } else if (c->srcBpc == 12) { \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
+ ff_hscale12to19_ ## filtersize ## _ ## opt1; \
+ } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
+ ff_hscale14to19_ ## filtersize ## _ ## opt1; \
+ } else { /* c->srcBpc == 16 */ \
+ av_assert0(c->srcBpc == 16);\
+ hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
ff_hscale16to19_ ## filtersize ## _ ## opt1; \
} \
} while (0)
@@ -341,14 +425,15 @@ switch(c->dstBpc){ \
case 16: do_16_case; break; \
case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
- default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
+ default: if (condition_8bit) /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \
}
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
switch(c->dstBpc){ \
case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
- default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
+ case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
+ default: av_assert0(c->dstBpc>8); \
}
#define case_rgb(x, X, opt) \
case AV_PIX_FMT_ ## X: \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 1e42ec5..36a606c 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -25,21 +25,101 @@
#undef REAL_MOVNTQ
#undef MOVNTQ
+#undef MOVNTQ2
#undef PREFETCH
-#if COMPILE_TEMPLATE_MMXEXT
-#define PREFETCH "prefetchnta"
-#else
-#define PREFETCH " # nop"
-#endif
#if COMPILE_TEMPLATE_MMXEXT
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
+#define MOVNTQ2 "movntq "
#else
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
+#define MOVNTQ2 "movq "
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
+#if !COMPILE_TEMPLATE_MMXEXT
+static av_always_inline void
+dither_8to16(const uint8_t *srcDither, int rot)
+{
+ if (rot) {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "psrlq $24, %%mm3\n\t"
+ "psllq $40, %%mm4\n\t"
+ "por %%mm4, %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "punpcklbw %%mm0, %%mm3\n\t"
+ "punpckhbw %%mm0, %%mm4\n\t"
+ :: "r"(srcDither)
+ );
+ } else {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "punpcklbw %%mm0, %%mm3\n\t"
+ "punpckhbw %%mm0, %%mm4\n\t"
+ :: "r"(srcDither)
+ );
+ }
+}
+#endif
+
+static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset)
+{
+ dither_8to16(dither, offset);
+ filterSize--;
+ __asm__ volatile(
+ "movd %0, %%mm1\n\t"
+ "punpcklwd %%mm1, %%mm1\n\t"
+ "punpckldq %%mm1, %%mm1\n\t"
+ "psllw $3, %%mm1\n\t"
+ "paddw %%mm1, %%mm3\n\t"
+ "paddw %%mm1, %%mm4\n\t"
+ "psraw $4, %%mm3\n\t"
+ "psraw $4, %%mm4\n\t"
+ ::"m"(filterSize)
+ );
+
+ __asm__ volatile(\
+ "movq %%mm3, %%mm6\n\t"
+ "movq %%mm4, %%mm7\n\t"
+ "movl %3, %%ecx\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ".p2align 4 \n\t" /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
+ "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\
+ "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t"\
+ "pmulhw %%mm0, %%mm5 \n\t"\
+ "paddw %%mm2, %%mm3 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ " jnz 1b \n\t"\
+ "psraw $3, %%mm3 \n\t"\
+ "psraw $3, %%mm4 \n\t"\
+ "packuswb %%mm4, %%mm3 \n\t"
+ MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
+ "add $8, %%"REG_c" \n\t"\
+ "cmp %2, %%"REG_c" \n\t"\
+ "movq %%mm6, %%mm3\n\t"
+ "movq %%mm7, %%mm4\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+ : "%"REG_d, "%"REG_S, "%"REG_c
+ );
+}
+
#define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
@@ -92,6 +172,7 @@
:: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \
+ NAMED_CONSTRAINTS_ADD(bF8,bFC) \
: "%"REG_a, "%"REG_d, "%"REG_S \
);
@@ -252,7 +333,7 @@
MOVNTQ( q3, 24(dst, index, 4))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
@@ -265,7 +346,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE
@@ -278,13 +359,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
YSCALEYUV2PACKEDX_END
} else {
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
}
}
@@ -298,7 +379,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX
@@ -307,13 +388,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
} else {
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%4, "%5", %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END
}
}
@@ -342,7 +423,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
MOVNTQ(%%mm1, 8(dst, index, 2))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITERGB16(dst, dstw, index) REAL_WRITERGB16(dst, dstw, index)
@@ -355,7 +436,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -366,7 +447,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
"paddusb "RED_DITHER"(%0), %%mm5\n\t"
#endif
- WRITERGB16(%4, %5, %%REGa)
+ WRITERGB16(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -379,7 +460,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -390,7 +471,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
"paddusb "RED_DITHER"(%0), %%mm5 \n\t"
#endif
- WRITERGB16(%4, %5, %%REGa)
+ WRITERGB16(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -419,7 +500,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
MOVNTQ(%%mm1, 8(dst, index, 2))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
@@ -432,7 +513,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -443,7 +524,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
"paddusb "RED_DITHER"(%0), %%mm5\n\t"
#endif
- WRITERGB15(%4, %5, %%REGa)
+ WRITERGB15(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -456,7 +537,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -467,7 +548,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
"paddusb "RED_DITHER"(%0), %%mm5 \n\t"
#endif
- WRITERGB15(%4, %5, %%REGa)
+ WRITERGB15(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -521,7 +602,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"add $24, "#dst" \n\t"\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEBGR24MMXEXT(dst, dstw, index) \
@@ -569,7 +650,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"add $24, "#dst" \n\t"\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#if COMPILE_TEMPLATE_MMXEXT
@@ -580,6 +661,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index)
#endif
+#if HAVE_6REGS
static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrUSrc,
@@ -589,17 +671,18 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
"add %4, %%"REG_c" \n\t"
- WRITEBGR24(%%REGc, %5, %%REGa)
+ WRITEBGR24(%%REGc, "%5", %%REGa)
:: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
}
@@ -613,20 +696,22 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize
"add %4, %%"REG_c" \n\t"
- WRITEBGR24(%%REGc, %5, %%REGa)
+ WRITEBGR24(%%REGc, "%5", %%REGa)
:: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
);
}
+#endif /* HAVE_6REGS */
#define REAL_WRITEYUY2(dst, dstw, index) \
"packuswb %%mm3, %%mm3 \n\t"\
@@ -641,7 +726,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
MOVNTQ(%%mm7, 8(dst, index, 2))\
\
"add $8, "#index" \n\t"\
- "cmp "#dstw", "#index" \n\t"\
+ "cmp "dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
@@ -654,7 +739,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -662,7 +747,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4 \n\t"
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
- WRITEYUY2(%4, %5, %%REGa)
+ WRITEYUY2(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -675,7 +760,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -683,7 +768,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4 \n\t"
"psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t"
- WRITEYUY2(%4, %5, %%REGa)
+ WRITEYUY2(%4, "%5", %%REGa)
YSCALEYUV2PACKEDX_END
}
@@ -784,15 +869,15 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1 \n\t"
- WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%4, DSTW_OFFSET"(%5)", %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "r" (dest),
"a" (&c->redDither),
"r" (abuf0), "r" (abuf1)
: "%r8"
);
#else
- *(const uint16_t **)(&c->u_temp)=abuf0;
- *(const uint16_t **)(&c->v_temp)=abuf1;
+ c->u_temp=(intptr_t)abuf0;
+ c->v_temp=(intptr_t)abuf1;
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -808,7 +893,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
"packuswb %%mm7, %%mm1 \n\t"
"pop %1 \n\t"
"pop %0 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -822,7 +907,7 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -839,18 +924,18 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
}
@@ -862,7 +947,6 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -875,11 +959,12 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
}
@@ -891,7 +976,6 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -904,11 +988,12 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
}
@@ -960,13 +1045,12 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1];
- //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1109,7 +1193,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
YSCALEYUV2RGB1_ALPHA(%%REGBP)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1122,7 +1206,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1138,7 +1222,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
YSCALEYUV2RGB1_ALPHA(%%REGBP)
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1151,7 +1235,7 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t"
- WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+ WRITEBGR32(%%REGb, DSTW_OFFSET"(%5)", %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1177,11 +1261,12 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1191,11 +1276,12 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
"push %%"REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5)
"pxor %%mm7, %%mm7 \n\t"
- WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
+ WRITEBGR24(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(ff_M24A,ff_M24C,ff_M24B)
);
}
}
@@ -1222,11 +1308,12 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1242,11 +1329,12 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB15(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB15(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8)
);
}
}
@@ -1273,11 +1361,12 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
} else {
const int16_t *ubuf1 = ubuf[1];
@@ -1293,11 +1382,12 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif
- WRITERGB16(%%REGb, 8280(%5), %%REGBP)
+ WRITERGB16(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither)
+ NAMED_CONSTRAINTS_ADD(bF8,bFC)
);
}
}
@@ -1354,7 +1444,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED1(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1367,7 +1457,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
"mov %4, %%"REG_b" \n\t"
"push %%"REG_BP" \n\t"
YSCALEYUV2PACKED1b(%%REGBP, %5)
- WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
+ WRITEYUY2(%%REGb, DSTW_OFFSET"(%5)", %%REGBP)
"pop %%"REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
@@ -1375,203 +1465,20 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
);
}
}
-
-#if COMPILE_TEMPLATE_MMXEXT
-static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
- int dstWidth, const uint8_t *src,
- int srcW, int xInc)
-{
- int32_t *filterPos = c->hLumFilterPos;
- int16_t *filter = c->hLumFilter;
- void *mmxextFilterCode = c->lumMmxextFilterCode;
- int i;
-#if defined(PIC)
- uint64_t ebxsave;
-#endif
-#if ARCH_X86_64
- uint64_t retsave;
-#endif
-
- __asm__ volatile(
-#if defined(PIC)
- "mov %%"REG_b", %5 \n\t"
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %6 \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %5 \n\t"
-#endif
-#endif
- "pxor %%mm7, %%mm7 \n\t"
- "mov %0, %%"REG_c" \n\t"
- "mov %1, %%"REG_D" \n\t"
- "mov %2, %%"REG_d" \n\t"
- "mov %3, %%"REG_b" \n\t"
- "xor %%"REG_a", %%"REG_a" \n\t" // i
- PREFETCH" (%%"REG_c") \n\t"
- PREFETCH" 32(%%"REG_c") \n\t"
- PREFETCH" 64(%%"REG_c") \n\t"
-
-#if ARCH_X86_64
-#define CALL_MMXEXT_FILTER_CODE \
- "movl (%%"REG_b"), %%esi \n\t"\
- "call *%4 \n\t"\
- "movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\
- "add %%"REG_S", %%"REG_c" \n\t"\
- "add %%"REG_a", %%"REG_D" \n\t"\
- "xor %%"REG_a", %%"REG_a" \n\t"\
-
-#else
-#define CALL_MMXEXT_FILTER_CODE \
- "movl (%%"REG_b"), %%esi \n\t"\
- "call *%4 \n\t"\
- "addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\
- "add %%"REG_a", %%"REG_D" \n\t"\
- "xor %%"REG_a", %%"REG_a" \n\t"\
-
-#endif /* ARCH_X86_64 */
-
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
-
-#if defined(PIC)
- "mov %5, %%"REG_b" \n\t"
-#if ARCH_X86_64
- "mov %6, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov %5, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#endif
- :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
- "m" (mmxextFilterCode)
-#if defined(PIC)
- ,"m" (ebxsave)
-#endif
-#if ARCH_X86_64
- ,"m"(retsave)
-#endif
- : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
- ,"%"REG_b
-#endif
- );
-
- for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
- dst[i] = src[srcW-1]*128;
-}
-
-static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
- int dstWidth, const uint8_t *src1,
- const uint8_t *src2, int srcW, int xInc)
-{
- int32_t *filterPos = c->hChrFilterPos;
- int16_t *filter = c->hChrFilter;
- void *mmxextFilterCode = c->chrMmxextFilterCode;
- int i;
-#if defined(PIC)
- DECLARE_ALIGNED(8, uint64_t, ebxsave);
-#endif
-#if ARCH_X86_64
- DECLARE_ALIGNED(8, uint64_t, retsave);
-#endif
-
- __asm__ volatile(
-#if defined(PIC)
- "mov %%"REG_b", %7 \n\t"
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %8 \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov -8(%%rsp), %%"REG_a" \n\t"
- "mov %%"REG_a", %7 \n\t"
-#endif
-#endif
- "pxor %%mm7, %%mm7 \n\t"
- "mov %0, %%"REG_c" \n\t"
- "mov %1, %%"REG_D" \n\t"
- "mov %2, %%"REG_d" \n\t"
- "mov %3, %%"REG_b" \n\t"
- "xor %%"REG_a", %%"REG_a" \n\t" // i
- PREFETCH" (%%"REG_c") \n\t"
- PREFETCH" 32(%%"REG_c") \n\t"
- PREFETCH" 64(%%"REG_c") \n\t"
-
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- "xor %%"REG_a", %%"REG_a" \n\t" // i
- "mov %5, %%"REG_c" \n\t" // src
- "mov %6, %%"REG_D" \n\t" // buf2
- PREFETCH" (%%"REG_c") \n\t"
- PREFETCH" 32(%%"REG_c") \n\t"
- PREFETCH" 64(%%"REG_c") \n\t"
-
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
- CALL_MMXEXT_FILTER_CODE
-
-#if defined(PIC)
- "mov %7, %%"REG_b" \n\t"
-#if ARCH_X86_64
- "mov %8, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#else
-#if ARCH_X86_64
- "mov %7, %%"REG_a" \n\t"
- "mov %%"REG_a", -8(%%rsp) \n\t"
-#endif
-#endif
- :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
- "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
-#if defined(PIC)
- ,"m" (ebxsave)
-#endif
-#if ARCH_X86_64
- ,"m"(retsave)
-#endif
- : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
-#if !defined(PIC)
- ,"%"REG_b
-#endif
- );
-
- for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
- dst1[i] = src1[srcW-1]*128;
- dst2[i] = src2[srcW-1]*128;
- }
-}
-#endif /* COMPILE_TEMPLATE_MMXEXT */
-
static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
{
enum AVPixelFormat dstFormat = c->dstFormat;
- if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
- dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21) {
- if (!(c->flags & SWS_BITEXACT)) {
+ c->use_mmx_vfilter= 0;
+ if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != AV_PIX_FMT_NV12
+ && dstFormat != AV_PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X_ar); break;
+#if HAVE_6REGS
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X_ar); break;
+#endif
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X_ar); break;
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X_ar); break;
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X_ar); break;
@@ -1579,10 +1486,14 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
}
}
} else {
+ c->use_mmx_vfilter= 1;
+ c->yuv2planeX = RENAME(yuv2yuvX );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
+#if HAVE_6REGS
case AV_PIX_FMT_BGR24: c->yuv2packedX = RENAME(yuv2bgr24_X); break;
+#endif
case AV_PIX_FMT_RGB555: c->yuv2packedX = RENAME(yuv2rgb555_X); break;
case AV_PIX_FMT_RGB565: c->yuv2packedX = RENAME(yuv2rgb565_X); break;
case AV_PIX_FMT_YUYV422: c->yuv2packedX = RENAME(yuv2yuyv422_X); break;
@@ -1590,7 +1501,6 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
}
}
}
- }
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case AV_PIX_FMT_RGB32:
@@ -1619,12 +1529,12 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
}
}
- if (c->srcBpc == 8 && c->dstBpc <= 10) {
+ if (c->srcBpc == 8 && c->dstBpc <= 14) {
// Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMXEXT
if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
- c->hyscale_fast = RENAME(hyscale_fast);
- c->hcscale_fast = RENAME(hcscale_fast);
+ c->hyscale_fast = ff_hyscale_fast_mmxext;
+ c->hcscale_fast = ff_hcscale_fast_mmxext;
} else {
#endif /* COMPILE_TEMPLATE_MMXEXT */
c->hyscale_fast = NULL;
diff --git a/libswscale/x86/w64xmmtest.c b/libswscale/x86/w64xmmtest.c
index dd9a2a4..88143d9 100644
--- a/libswscale/x86/w64xmmtest.c
+++ b/libswscale/x86/w64xmmtest.c
@@ -2,20 +2,20 @@
* check XMM registers for clobbers on Win64
* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index bacc87f..5e2f77c 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -7,27 +7,26 @@
* 1,4,8bpp support and context / deglobalize stuff
* by Michael Niedermayer (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
-#include <assert.h>
#include "config.h"
#include "libswscale/rgb2rgb.h"
@@ -51,34 +50,30 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
//MMX versions
-#if HAVE_MMX_INLINE
+#if HAVE_MMX_INLINE && HAVE_6REGS
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 0
#define RENAME(a) a ## _mmx
#include "yuv2rgb_template.c"
-#endif /* HAVE_MMX_INLINE */
+#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
// MMXEXT versions
-#if HAVE_MMXEXT_INLINE
+#if HAVE_MMXEXT_INLINE && HAVE_6REGS
#undef RENAME
#undef COMPILE_TEMPLATE_MMXEXT
#define COMPILE_TEMPLATE_MMXEXT 1
#define RENAME(a) a ## _mmxext
#include "yuv2rgb_template.c"
-#endif /* HAVE_MMXEXT_INLINE */
+#endif /* HAVE_MMXEXT_INLINE && HAVE_6REGS */
#endif /* HAVE_INLINE_ASM */
av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
{
-#if HAVE_MMX_INLINE
+#if HAVE_MMX_INLINE && HAVE_6REGS
int cpu_flags = av_get_cpu_flags();
- if (c->srcFormat != AV_PIX_FMT_YUV420P &&
- c->srcFormat != AV_PIX_FMT_YUVA420P)
- return NULL;
-
#if HAVE_MMXEXT_INLINE
if (INLINE_MMXEXT(cpu_flags)) {
switch (c->dstFormat) {
@@ -118,7 +113,7 @@ av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
return yuv420_rgb15_mmx;
}
}
-#endif /* HAVE_MMX_INLINE */
+#endif /* HAVE_MMX_INLINE && HAVE_6REGS */
return NULL;
}
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index 0b97516..acb78f5 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -4,20 +4,20 @@
* Copyright (C) 2001-2007 Michael Niedermayer
* (c) 2010 Konstantin Shishkov
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -48,17 +48,14 @@
if (h_size * depth > FFABS(dstStride[0])) \
h_size -= 8; \
\
- if (c->srcFormat == AV_PIX_FMT_YUV422P) { \
- srcStride[1] *= 2; \
- srcStride[2] *= 2; \
- } \
+ vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
\
__asm__ volatile ("pxor %mm4, %mm4\n\t"); \
for (y = 0; y < srcSliceH; y++) { \
uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
const uint8_t *py = src[0] + y * srcStride[0]; \
- const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \
- const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \
+ const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
+ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
x86_reg index = -h_size / 2; \
#define YUV2RGB_INITIAL_LOAD \
@@ -142,10 +139,21 @@
"add $4, %0\n\t" \
"js 1b\n\t" \
+#if COMPILE_TEMPLATE_MMXEXT
+#undef RGB_PACK24_B_OPERANDS
+#define RGB_PACK24_B_OPERANDS NAMED_CONSTRAINTS_ARRAY_ADD(mask1101,mask0110,mask0100,mask0010,mask1001)
+#else
+#undef RGB_PACK24_B_OPERANDS
+#define RGB_PACK24_B_OPERANDS
+#endif
+
#define YUV2RGB_OPERANDS \
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index) \
+ NAMED_CONSTRAINTS_ADD(mmx_00ffw,pb_03,pb_07,mmx_redmask,pb_e0) \
+ RGB_PACK24_B_OPERANDS \
+ : "memory" \
); \
} \
@@ -153,6 +161,8 @@
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index), "r" (pa - 2*index) \
+ NAMED_CONSTRAINTS_ADD(mmx_00ffw) \
+ : "memory" \
); \
} \
@@ -193,7 +203,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(2)
@@ -221,7 +231,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(2)
@@ -311,7 +321,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(3)
@@ -329,7 +339,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(3)
@@ -373,7 +383,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
@@ -394,7 +404,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
@@ -416,7 +426,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
@@ -437,7 +447,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 480fbe3..8e92e6d 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -6,27 +6,26 @@
* 1,4,8bpp support and context / deglobalize stuff
* by Michael Niedermayer (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
-#include <assert.h>
#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
@@ -34,6 +33,7 @@
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
+#include "libavutil/pixdesc.h"
const int32_t ff_yuv2rgb_coeffs[8][4] = {
{ 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */
@@ -56,9 +56,9 @@ const int *sws_getCoefficients(int colorspace)
#define LOADCHROMA(i) \
U = pu[i]; \
V = pv[i]; \
- r = (void *)c->table_rV[V]; \
- g = (void *)(c->table_gU[U] + c->table_gV[V]); \
- b = (void *)c->table_bU[U];
+ r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
+ g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
+ b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
#define PUTRGB(dst, src, i) \
Y = src[2 * i]; \
@@ -382,24 +382,65 @@ ENDYUV2RGBLINE(24, 1)
PUTBGR24(dst_2, py_2, 0);
ENDYUV2RGBFUNC()
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
+YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+ const uint8_t *e16 = ff_dither_2x2_4[y & 1];
+ const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1];
+
+#define PUTRGB16(dst, src, i, o) \
+ Y = src[2 * i]; \
+ dst[2 * i] = r[Y + d16[0 + o]] + \
+ g[Y + e16[0 + o]] + \
+ b[Y + f16[0 + o]]; \
+ Y = src[2 * i + 1]; \
+ dst[2 * i + 1] = r[Y + d16[1 + o]] + \
+ g[Y + e16[1 + o]] + \
+ b[Y + f16[1 + o]];
LOADCHROMA(0);
- PUTRGB(dst_1, py_1, 0);
- PUTRGB(dst_2, py_2, 0);
+ PUTRGB16(dst_1, py_1, 0, 0);
+ PUTRGB16(dst_2, py_2, 0, 0 + 8);
LOADCHROMA(1);
- PUTRGB(dst_2, py_2, 1);
- PUTRGB(dst_1, py_1, 1);
+ PUTRGB16(dst_2, py_2, 1, 2 + 8);
+ PUTRGB16(dst_1, py_1, 1, 2);
LOADCHROMA(2);
- PUTRGB(dst_1, py_1, 2);
- PUTRGB(dst_2, py_2, 2);
+ PUTRGB16(dst_1, py_1, 2, 4);
+ PUTRGB16(dst_2, py_2, 2, 4 + 8);
LOADCHROMA(3);
- PUTRGB(dst_2, py_2, 3);
- PUTRGB(dst_1, py_1, 3);
+ PUTRGB16(dst_2, py_2, 3, 6 + 8);
+ PUTRGB16(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
+ const uint8_t *d16 = ff_dither_2x2_8[y & 1];
+ const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1];
+
+#define PUTRGB15(dst, src, i, o) \
+ Y = src[2 * i]; \
+ dst[2 * i] = r[Y + d16[0 + o]] + \
+ g[Y + d16[1 + o]] + \
+ b[Y + e16[0 + o]]; \
+ Y = src[2 * i + 1]; \
+ dst[2 * i + 1] = r[Y + d16[1 + o]] + \
+ g[Y + d16[0 + o]] + \
+ b[Y + e16[1 + o]];
+ LOADCHROMA(0);
+ PUTRGB15(dst_1, py_1, 0, 0);
+ PUTRGB15(dst_2, py_2, 0, 0 + 8);
+
+ LOADCHROMA(1);
+ PUTRGB15(dst_2, py_2, 1, 2 + 8);
+ PUTRGB15(dst_1, py_1, 1, 2);
+
+ LOADCHROMA(2);
+ PUTRGB15(dst_1, py_1, 2, 4);
+ PUTRGB15(dst_2, py_2, 2, 4 + 8);
+
+ LOADCHROMA(3);
+ PUTRGB15(dst_2, py_2, 3, 6 + 8);
+ PUTRGB15(dst_1, py_1, 3, 6);
CLOSEYUV2RGBFUNC(8)
// r, g, b, dst_1, dst_2
@@ -532,7 +573,7 @@ CLOSEYUV2RGBFUNC(8)
YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
const uint8_t *d128 = ff_dither_8x8_220[y & 7];
char out_1 = 0, out_2 = 0;
- g = c->table_gU[128] + c->table_gV[128];
+ g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
#define PUTRGB1(out, src, i, o) \
Y = src[2 * i]; \
@@ -570,7 +611,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
av_log(c, AV_LOG_WARNING,
"No accelerated colorspace conversion found from %s to %s.\n",
- sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
switch (c->dstFormat) {
case AV_PIX_FMT_BGR48BE:
@@ -581,23 +622,21 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
return yuv2rgb_c_48;
case AV_PIX_FMT_ARGB:
case AV_PIX_FMT_ABGR:
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == AV_PIX_FMT_YUVA420P)
+ if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
return yuva2argb_c;
case AV_PIX_FMT_RGBA:
case AV_PIX_FMT_BGRA:
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == AV_PIX_FMT_YUVA420P)
- return yuva2rgba_c;
- else
- return yuv2rgb_c_32;
+ return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
case AV_PIX_FMT_RGB24:
return yuv2rgb_c_24_rgb;
case AV_PIX_FMT_BGR24:
return yuv2rgb_c_24_bgr;
case AV_PIX_FMT_RGB565:
case AV_PIX_FMT_BGR565:
+ return yuv2rgb_c_16_ordered_dither;
case AV_PIX_FMT_RGB555:
case AV_PIX_FMT_BGR555:
- return yuv2rgb_c_16;
+ return yuv2rgb_c_15_ordered_dither;
case AV_PIX_FMT_RGB444:
case AV_PIX_FMT_BGR444:
return yuv2rgb_c_12_ordered_dither;
@@ -612,36 +651,32 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
return yuv2rgb_c_4b_ordered_dither;
case AV_PIX_FMT_MONOBLACK:
return yuv2rgb_c_1_ordered_dither;
- default:
- assert(0);
}
return NULL;
}
-static void fill_table(uint8_t *table[256], const int elemsize,
- const int inc, void *y_tab)
+static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize,
+ const int64_t inc, void *y_tab)
{
int i;
- int64_t cb = 0;
uint8_t *y_table = y_tab;
y_table -= elemsize * (inc >> 9);
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
+ int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
table[i] = y_table + elemsize * (cb >> 16);
- cb += inc;
}
}
-static void fill_gv_table(int table[256], const int elemsize, const int inc)
+static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int64_t inc)
{
int i;
- int64_t cb = 0;
int off = -(inc >> 9);
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
+ int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
table[i] = elemsize * (off + (cb >> 16));
- cb += inc;
}
}
@@ -684,7 +719,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
uint8_t *y_table;
uint16_t *y_table16;
uint32_t *y_table32;
- int i, base, rbase, gbase, bbase, abase, needAlpha;
+ int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha;
const int yoffs = fullRange ? 384 : 326;
int64_t crv = inv_table[0];
@@ -729,12 +764,12 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
c->yuv2rgb_u2b_coeff = (int16_t)roundToInt16(cbu << 13);
//scale coefficients by cy
- crv = ((crv << 16) + 0x8000) / cy;
- cbu = ((cbu << 16) + 0x8000) / cy;
- cgu = ((cgu << 16) + 0x8000) / cy;
- cgv = ((cgv << 16) + 0x8000) / cy;
+ crv = ((crv << 16) + 0x8000) / FFMAX(cy, 1);
+ cbu = ((cbu << 16) + 0x8000) / FFMAX(cy, 1);
+ cgu = ((cgu << 16) + 0x8000) / FFMAX(cy, 1);
+ cgv = ((cgv << 16) + 0x8000) / FFMAX(cy, 1);
- av_free(c->yuvTable);
+ av_freep(&c->yuvTable);
switch (bpp) {
case 1:
@@ -847,6 +882,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
fill_gv_table(c->table_gV, 1, cgv);
break;
case 32:
+ case 64:
base = (c->dstFormat == AV_PIX_FMT_RGB32_1 ||
c->dstFormat == AV_PIX_FMT_BGR32_1) ? 8 : 0;
rbase = base + (isRgb ? 16 : 0);
@@ -872,7 +908,6 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
fill_gv_table(c->table_gV, 4, cgv);
break;
default:
- c->yuvTable = NULL;
if(!isPlanar(c->dstFormat) || bpp <= 24)
av_log(c, AV_LOG_ERROR, "%ibpp not supported by yuv2rgb\n", bpp);
return -1;
OpenPOWER on IntegriCloud