diff options
Diffstat (limited to 'libswscale')
32 files changed, 1322 insertions, 911 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile index 36c2bef..e78ec42 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -1,3 +1,5 @@ +include $(SUBDIR)../config.mak + NAME = swscale FFLIBS = avutil @@ -20,6 +22,8 @@ MMX-OBJS-$(HAVE_YASM) += x86/input.o \ x86/output.o \ x86/scale.o +$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) + OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o TESTPROGS = colorspace swscale diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S index b007f07..eab30aa 100644 --- a/libswscale/bfin/internal_bfin.S +++ b/libswscale/bfin/internal_bfin.S @@ -5,20 +5,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index f9eba1e..3cd4f28 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -3,20 +3,20 @@ * * Blackfin software video scaler operations * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c index 91a7aee..e7f657f 100644 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ b/libswscale/bfin/yuv2rgb_bfin.c @@ -4,20 +4,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -27,6 +27,7 @@ #include <inttypes.h> #include <assert.h> #include <unistd.h> +#include "libavutil/pixdesc.h" #include "config.h" #include "libswscale/rgb2rgb.h" @@ -198,7 +199,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) } av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return f; } diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index 135924c..89713a8 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -33,7 +33,7 @@ #define FUNC(s, d, n) { s, d, #n, n } -int main(void) +int main(int argc, char **argv) { int i, funcNum; uint8_t *srcBuffer = av_malloc(SIZE); diff --git a/libswscale/input.c b/libswscale/input.c index e636eac..9838677 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -104,7 +104,7 @@ rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, #undef input_pixel #define rgb48funcs(pattern, BE_LE, origin) \ -static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \ +static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ int width, uint32_t *unused) \ { \ const uint16_t *src = (const uint16_t *) _src; \ @@ -113,7 +113,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, } \ \ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *_src1, const uint8_t *_src2, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ int width, uint32_t *unused) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ @@ -123,7 +123,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *_src1, const uint8_t *_src2, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ int width, uint32_t *unused) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ @@ -142,14 +142,14 @@ rgb48funcs(bgr, BE, PIX_FMT_BGR48BE) (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2]))) static av_always_inline void -rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, +rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, int maskr, int maskg, int maskb, int rsh, int gsh, int bsh, int S) { const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh; - const unsigned rnd = 33u << (S - 1); + const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -158,12 +158,12 @@ rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dst[i] = (ry * r + gy * g + by * b + rnd) >> S; + dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); } } static av_always_inline void -rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, +rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, @@ -172,7 +172,7 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, { const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh; - const unsigned rnd = 257u << (S - 1); + const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -181,13 +181,13 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S; - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S; + dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); } } static av_always_inline void -rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, +rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, @@ -197,7 +197,7 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, maskgx = ~(maskr | maskb); - const unsigned rnd = 257u << S; + const unsigned rnd = (256U<<(S)) + (1<<(S-6)); int i; maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1; @@ -216,8 +216,8 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, } r = (rb & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1); - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1); + dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); } } @@ -225,26 +225,29 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ maskg, maskb, rsh, gsh, bsh, S) \ -static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \ +static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ int width, uint32_t *unused) \ { \ - rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \ + shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } \ \ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src, const uint8_t *dummy, \ + const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ int width, uint32_t *unused) \ { \ - rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ + shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } \ \ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src, const uint8_t *dummy, \ + const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ int width, uint32_t *unused) \ { \ - rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ + shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } @@ -265,34 +268,59 @@ rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x0 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7) rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4) -static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV, + const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, + int width, enum PixelFormat origin) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i]; + unsigned int g = gsrc[2*i] + gsrc[2*i+1]; + unsigned int b = bsrc[2*i] + bsrc[2*i+1]; + unsigned int r = rsrc[2*i] + rsrc[2*i+1]; + + dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + } +} + +static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dst[i]= src[4*i]<<6; + } +} + +static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dst[i]= src[4*i+3]<<6; } } -static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i+3]; + int d= src[i]; + + dst[i]= (pal[d] >> 24)<<6; } } -static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal) +static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal) { int i; for (i=0; i<width; i++) { int d= src[i]; - dst[i]= pal[d] & 0xFF; + dst[i]= (pal[d] & 0xFF)<<6; } } -static void palToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, +static void palToUV_c(uint16_t *dstU, int16_t *dstV, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *pal) { int i; @@ -300,34 +328,42 @@ static void palToUV_c(uint8_t *dstU, uint8_t *dstV, for (i=0; i<width; i++) { int p= pal[src1[i]]; - dstU[i]= p>>8; - dstV[i]= p>>16; + dstU[i]= (uint8_t)(p>> 8)<<6; + dstV[i]= (uint8_t)(p>>16)<<6; } } -static void monowhite2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= ~src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; + } + if(width&7){ + int d= ~src[i]; + for(j=0; j<(width&7); j++) + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } -static void monoblack2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; + } + if(width&7){ + int d= src[i]; + for(j=0; j<(width&7); j++) + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } -static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -335,7 +371,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, dst[i]= src[2*i]; } -static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -346,7 +382,7 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, assert(src1 == src2); } -static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused) +static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; const uint16_t *src = (const uint16_t *) _src; @@ -356,7 +392,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t * } } -static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, +static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, int width, uint32_t *unused) { int i; @@ -371,7 +407,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, +static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -379,7 +415,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, dst[i]= src[2*i+1]; } -static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -401,14 +437,14 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, } static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { nvXXtoUV_c(dstU, dstV, src1, width); } static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { nvXXtoUV_c(dstV, dstU, src1, width); @@ -416,7 +452,7 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) -static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, +static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -425,11 +461,11 @@ static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, int g= src[i*3+1]; int r= src[i*3+2]; - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -438,13 +474,13 @@ static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[3*i + 1]; int r= src1[3*i + 2]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } assert(src1 == src2); } -static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -453,13 +489,13 @@ static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[6*i + 1] + src1[6*i + 4]; int r= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } assert(src1 == src2); } -static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -468,11 +504,11 @@ static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, int g= src[i*3+1]; int b= src[i*3+2]; - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -482,12 +518,12 @@ static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[3*i + 1]; int b= src1[3*i + 2]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } -static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -497,12 +533,12 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[6*i + 1] + src1[6*i + 4]; int b= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } } -static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width) +static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width) { int i; for (i = 0; i < width; i++) { @@ -510,7 +546,7 @@ static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width) int b = src[1][i]; int r = src[2][i]; - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); + dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } @@ -542,7 +578,7 @@ static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width } } -static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width) +static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width) { int i; for (i = 0; i < width; i++) { @@ -550,8 +586,8 @@ static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4] int b = src[1][i]; int r = src[2][i]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); - dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); + dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } @@ -653,6 +689,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break; case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; + case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break; case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break; case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break; } @@ -756,6 +793,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_ABGR: case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break; + case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break; } } } diff --git a/libswscale/options.c b/libswscale/options.c index 9ba6e5e..65095d5 100644 --- a/libswscale/options.c +++ b/libswscale/options.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/output.c b/libswscale/output.c index 8263da1..cae2c31 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -25,6 +25,7 @@ #include <string.h> #include "libavutil/avutil.h" +#include "libavutil/avassert.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" #include "libavutil/intreadwrite.h" @@ -135,7 +136,8 @@ yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW, int big_endian, int output_bits) { int i; - int shift = 19 - output_bits; + int shift = 3; + av_assert0(output_bits == 16); for (i = 0; i < dstW; i++) { int val = src[i] + (1 << (shift - 1)); @@ -149,10 +151,11 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize, int big_endian, int output_bits) { int i; - int shift = 15 + 16 - output_bits; + int shift = 15; + av_assert0(output_bits == 16); for (i = 0; i < dstW; i++) { - int val = 1 << (30-output_bits); + int val = 1 << (shift - 1); int j; /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline @@ -199,7 +202,7 @@ yuv2planeX_10_c_template(const int16_t *filter, int filterSize, int shift = 11 + 16 - output_bits; for (i = 0; i < dstW; i++) { - int val = 1 << (26-output_bits); + int val = 1 << (shift - 1); int j; for (j = 0; j < filterSize; j++) @@ -388,14 +391,14 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, for (i = 0; i < dstW - 7; i += 8) { int acc = 0; - accumulate_bit(acc, (buf0[i + 0] >> 7) + d128[0]); - accumulate_bit(acc, (buf0[i + 1] >> 7) + d128[1]); - accumulate_bit(acc, (buf0[i + 2] >> 7) + d128[2]); - accumulate_bit(acc, (buf0[i + 3] >> 7) + d128[3]); - accumulate_bit(acc, (buf0[i + 4] >> 7) + d128[4]); - accumulate_bit(acc, (buf0[i + 5] >> 7) + d128[5]); - accumulate_bit(acc, (buf0[i + 6] >> 7) + d128[6]); - accumulate_bit(acc, (buf0[i + 7] >> 7) + d128[7]); + accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); + accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); + accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); + accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); + accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); + accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); + accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); + accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); output_pixel(*dest++, acc); } @@ -511,6 +514,13 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } + output_pixels(i * 4, Y1, U, Y2, V); } } @@ -526,20 +536,34 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { for (i = 0; i < (dstW >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = ubuf0[i] >> 7; - int V = vbuf0[i] >> 7; + int Y1 = (buf0[i * 2 ]+64) >> 7; + int Y2 = (buf0[i * 2 + 1]+64) >> 7; + int U = (ubuf0[i] +64) >> 7; + int V = (vbuf0[i] +64) >> 7; + + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } output_pixels(i * 4, Y1, U, Y2, V); } } else { const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; for (i = 0; i < (dstW >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = (ubuf0[i] + ubuf1[i]) >> 8; - int V = (vbuf0[i] + vbuf1[i]) >> 8; + int Y1 = (buf0[i * 2 ] + 64) >> 7; + int Y2 = (buf0[i * 2 + 1] + 64) >> 7; + int U = (ubuf0[i] + ubuf1[i]+128) >> 8; + int V = (vbuf0[i] + vbuf1[i]+128) >> 8; + + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } output_pixels(i * 4, Y1, U, Y2, V); } @@ -792,7 +816,7 @@ YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE) * correct RGB values into the destination buffer. */ static av_always_inline void -yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2, +yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, unsigned A1, unsigned A2, const void *_r, const void *_g, const void *_b, int y, enum PixelFormat target, int hasAlpha) @@ -828,6 +852,7 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2, #define r_b ((target == PIX_FMT_RGB24) ? r : b) #define b_r ((target == PIX_FMT_RGB24) ? b : r) + dest[i * 6 + 0] = r_b[Y1]; dest[i * 6 + 1] = g[Y1]; dest[i * 6 + 2] = b_r[Y1]; @@ -933,12 +958,6 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, Y2 >>= 19; U >>= 19; V >>= 19; - if ((Y1 | Y2 | U | V) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - } if (hasAlpha) { A1 = 1 << 18; A2 = 1 << 18; @@ -954,10 +973,9 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, } } - /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/ - r = c->table_rV[V]; - g = (c->table_gU[U] + c->table_gV[V]); - b = c->table_bU[U]; + r = c->table_rV[V + YUVRGB_TABLE_HEADROOM]; + g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]); + b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, r, g, b, y, target, hasAlpha); @@ -986,9 +1004,9 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2], int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; int A1, A2; - const void *r = c->table_rV[V], - *g = (c->table_gU[U] + c->table_gV[V]), - *b = c->table_bU[U]; + const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], + *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), + *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; if (hasAlpha) { A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19; @@ -1012,18 +1030,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { for (i = 0; i < (dstW >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = ubuf0[i] >> 7; - int V = vbuf0[i] >> 7; + int Y1 = (buf0[i * 2 ] + 64) >> 7; + int Y2 = (buf0[i * 2 + 1] + 64) >> 7; + int U = (ubuf0[i] + 64) >> 7; + int V = (vbuf0[i] + 64) >> 7; int A1, A2; - const void *r = c->table_rV[V], - *g = (c->table_gU[U] + c->table_gV[V]), - *b = c->table_bU[U]; + const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], + *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), + *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; if (hasAlpha) { - A1 = abuf0[i * 2 ] >> 7; - A2 = abuf0[i * 2 + 1] >> 7; + A1 = (abuf0[i * 2 ] + 64) >> 7; + A2 = (abuf0[i * 2 + 1] + 64) >> 7; } yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, @@ -1032,18 +1050,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, } else { const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; for (i = 0; i < (dstW >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = (ubuf0[i] + ubuf1[i]) >> 8; - int V = (vbuf0[i] + vbuf1[i]) >> 8; + int Y1 = (buf0[i * 2 ] + 64) >> 7; + int Y2 = (buf0[i * 2 + 1] + 64) >> 7; + int U = (ubuf0[i] + ubuf1[i] + 128) >> 8; + int V = (vbuf0[i] + vbuf1[i] + 128) >> 8; int A1, A2; - const void *r = c->table_rV[V], - *g = (c->table_gU[U] + c->table_gV[V]), - *b = c->table_bU[U]; + const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], + *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), + *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; if (hasAlpha) { - A1 = abuf0[i * 2 ] >> 7; - A2 = abuf0[i * 2 + 1] >> 7; + A1 = (abuf0[i * 2 ] + 64) >> 7; + A2 = (abuf0[i * 2 + 1] + 64) >> 7; } yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, @@ -1117,9 +1135,9 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, for (i = 0; i < dstW; i++) { int j; - int Y = 0; - int U = -128 << 19; - int V = -128 << 19; + int Y = 1<<9; + int U = (1<<9)-(128 << 19); + int V = (1<<9)-(128 << 19); int R, G, B, A; for (j = 0; j < lumFilterSize; j++) { @@ -1133,7 +1151,7 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, U >>= 10; V >>= 10; if (hasAlpha) { - A = 1 << 21; + A = 1 << 18; for (j = 0; j < lumFilterSize; j++) { A += alpSrc[j][i] * lumFilter[j]; } @@ -1176,7 +1194,6 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, dest[1] = B >> 22; dest[2] = G >> 22; dest[3] = R >> 22; - dest += 4; break; case PIX_FMT_BGR24: dest[0] = B >> 22; @@ -1307,7 +1324,10 @@ void ff_sws_init_output_funcs(SwsContext *c, *yuv2packedX = yuv2bgr24_full_X_c; break; } + if(!*yuv2packedX) + goto YUV_PACKED; } else { + YUV_PACKED: switch (dstFormat) { case PIX_FMT_RGB48LE: *yuv2packed1 = yuv2rgb48le_1_c; diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 13c63e8..f8f109e 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 73c02e9..8e84c26 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -3,20 +3,20 @@ * * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -95,6 +95,7 @@ adjustment. #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" #include "yuv2rgb_altivec.h" #undef PROFILE_THE_BEAST @@ -298,7 +299,7 @@ static int altivec_##name (SwsContext *c, \ vector signed short R1,G1,B1; \ vector unsigned char R,G,B; \ \ - vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ + const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ vector unsigned char align_perm; \ \ vector signed short \ @@ -335,10 +336,10 @@ static int altivec_##name (SwsContext *c, \ \ for (j=0;j<w/16;j++) { \ \ - y1ivP = (vector unsigned char *)y1i; \ - y2ivP = (vector unsigned char *)y2i; \ - uivP = (vector unsigned char *)ui; \ - vivP = (vector unsigned char *)vi; \ + y1ivP = (const vector unsigned char *)y1i; \ + y2ivP = (const vector unsigned char *)y2i; \ + uivP = (const vector unsigned char *)ui; \ + vivP = (const vector unsigned char *)vi; \ \ align_perm = vec_lvsl (0, y1i); \ y0 = (vector unsigned char) \ @@ -720,7 +721,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, static int printed_error_message; if (!printed_error_message) { av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); printed_error_message=1; } return; @@ -795,7 +796,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, default: /* Unreachable, I think. */ av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return; } diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index 2c5e7ed..aa52a47 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c index 4cd02ff..82c265a 100644 --- a/libswscale/ppc/yuv2yuv_altivec.c +++ b/libswscale/ppc/yuv2yuv_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index 5ae0703..3ef0925 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include <inttypes.h> @@ -171,13 +171,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); #else - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); *d++ = 255; #endif } @@ -211,9 +211,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); } } @@ -250,13 +250,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); #else - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); *d++ = 255; #endif } @@ -271,9 +271,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); } } @@ -314,21 +314,6 @@ void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size) } } -void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i; - int num_pixels = src_size; - for (i=0; i<num_pixels; i++) { - unsigned b,g,r; - register uint8_t rgb; - rgb = src[i]; - r = (rgb&0x07); - g = (rgb&0x38)>>3; - b = (rgb&0xC0)>>6; - dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6); - } -} - #define DEFINE_SHUFFLE_BYTES(a, b, c, d) \ void shuffle_bytes_##a##b##c##d(const uint8_t *src, uint8_t *dst, int src_size) \ { \ diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index 42f468f..c447986 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -64,7 +64,6 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size); void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); -void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index c02015e..6363bc1 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -227,27 +227,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -257,9 +236,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -272,9 +251,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -289,13 +268,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); #else - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; #endif } @@ -312,13 +291,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); #else - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; #endif } @@ -658,6 +637,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ydst += lumStride; src += srcStride; + if(y+1 == height) + break; + for (i=0; i<chromWidth; i++) { unsigned int b = src[6*i+0]; unsigned int g = src[6*i+1]; diff --git a/libswscale/sparc/yuv2rgb_vis.c b/libswscale/sparc/yuv2rgb_vis.c index bcd2081..62f5026 100644 --- a/libswscale/sparc/yuv2rgb_vis.c +++ b/libswscale/sparc/yuv2rgb_vis.c @@ -2,20 +2,20 @@ * VIS optimized software YUV to RGB converter * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c index 3497dff..ef6c55c 100644 --- a/libswscale/swscale-test.c +++ b/libswscale/swscale-test.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -103,6 +103,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, av_image_fill_linesizes(srcStride, srcFormat, srcW); for (p = 0; p < 4; p++) { + srcStride[p] = FFALIGN(srcStride[p], 16); if (srcStride[p]) src[p] = av_mallocz(srcStride[p] * srcH + 16); if (srcStride[p] && !src[p]) { @@ -136,6 +137,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, * allocated with av_malloc). */ /* An extra 16 bytes is being allocated because some scalers may write * out of bounds. */ + dstStride[i] = FFALIGN(dstStride[i], 16); if (dstStride[i]) dst[i] = av_mallocz(dstStride[i] * dstH + 16); if (dstStride[i] && !dst[i]) { @@ -175,6 +177,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, ssdA = r->ssdA; } else { for (i = 0; i < 4; i++) { + refStride[i] = FFALIGN(refStride[i], 16); if (refStride[i]) out[i] = av_mallocz(refStride[i] * h); if (refStride[i] && !out[i]) { diff --git a/libswscale/swscale.c b/libswscale/swscale.c index b231302..0578e2d 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -27,6 +27,7 @@ #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/cpu.h" #include "libavutil/avutil.h" @@ -47,6 +48,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = { 64, 64, 64, 64, 64, 64, 64, 64 }; + static av_always_inline void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val) @@ -69,6 +71,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; int sh = bits - 4; + if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15) + sh= 9; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -90,6 +95,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t const uint16_t *src = (const uint16_t *) _src; int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + if(sh<15) + sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -204,7 +212,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width) int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) - dst[i] = (dst[i]*14071 + (33561947<<4))>>14; + dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; } static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, @@ -218,6 +226,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } // *** horizontal scale Y line to temp buffer @@ -228,12 +238,12 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { - void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; + void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; const uint8_t *src = src_in[isAlpha ? 3 : 0]; if (toYV12) { - toYV12(formatConvBuffer, src, srcW, pal); + toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src= formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { c->readLumPlanar(formatConvBuffer, src_in, srcW); @@ -263,6 +273,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } } static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, @@ -273,12 +287,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 { const uint8_t *src1 = src_in[1], *src2 = src_in[2]; if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); - c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); + c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; } else if (c->readChrPlanar) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); src1= formatConvBuffer; src2= buf2; @@ -316,8 +330,6 @@ static int swScale(SwsContext *c, const uint8_t* src[], int16_t *vChrFilterPos= c->vChrFilterPos; int16_t *hLumFilterPos= c->hLumFilterPos; int16_t *hChrFilterPos= c->hChrFilterPos; - int16_t *vLumFilter= c->vLumFilter; - int16_t *vChrFilter= c->vChrFilter; int16_t *hLumFilter= c->hLumFilter; int16_t *hChrFilter= c->hChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; @@ -337,13 +349,14 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); int lastDstY; uint32_t *pal=c->pal_yuv; + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + yuv2planar1_fn yuv2plane1 = c->yuv2plane1; yuv2planarX_fn yuv2planeX = c->yuv2planeX; yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; yuv2packed1_fn yuv2packed1 = c->yuv2packed1; yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packedX_fn yuv2packedX = c->yuv2packedX; - int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); /* vars which will change and which we need to store back in the context */ int dstY= c->dstY; @@ -373,7 +386,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); - if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) { + if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { static int warnedAlready=0; //FIXME move this into the context perhaps if (flags & SWS_PRINT_INFO && !warnedAlready) { av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" @@ -382,6 +395,18 @@ static int swScale(SwsContext *c, const uint8_t* src[], } } + if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16 + || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16 + || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16 + ) { + static int warnedAlready=0; + int cpu_flags = av_get_cpu_flags(); + if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ + av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); + warnedAlready=1; + } + } + /* Note the user might start scaling the picture in the middle so this will not get executed. This is not really intended but works currently, so people might do it. */ @@ -406,15 +431,14 @@ static int swScale(SwsContext *c, const uint8_t* src[], dst[2] + dstStride[2] * chrDstY, (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, }; + int use_mmx_vfilter= c->use_mmx_vfilter; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input - - // Last line needed as input - int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; - int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1; - int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; + int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input + int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input + int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input int enough_lines; //handle holes (FAST_BILINEAR & weird filters) @@ -503,85 +527,66 @@ static int swScale(SwsContext *c, const uint8_t* src[], // hmm looks like we can't use MMX here without overwriting this array's tail ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, &yuv2packed1, &yuv2packed2, &yuv2packedX); + use_mmx_vfilter= 0; } { - const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - - if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { - const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; - int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); - for (i = 0; i < neg; i++) - tmpY[i] = lumSrcPtr[neg]; - for ( ; i < end; i++) - tmpY[i] = lumSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpY[i] = tmpY[i-1]; - lumSrcPtr = tmpY; - - if (alpSrcPtr) { - const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; - for (i = 0; i < neg; i++) - tmpA[i] = alpSrcPtr[neg]; - for ( ; i < end; i++) - tmpA[i] = alpSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpA[i] = tmpA[i - 1]; - alpSrcPtr = tmpA; - } - } - if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { - const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize, - **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize; - int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); - for (i = 0; i < neg; i++) { - tmpU[i] = chrUSrcPtr[neg]; - tmpV[i] = chrVSrcPtr[neg]; - } - for ( ; i < end; i++) { - tmpU[i] = chrUSrcPtr[i]; - tmpV[i] = chrVSrcPtr[i]; - } - for ( ; i < vChrFilterSize; i++) { - tmpU[i] = tmpU[i - 1]; - tmpV[i] = tmpV[i - 1]; - } - chrUSrcPtr = tmpU; - chrVSrcPtr = tmpV; - } + const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + vLumFilter += dstY * vLumFilterSize; + vChrFilter += chrDstY * vChrFilterSize; + +// av_assert0(use_mmx_vfilter != ( +// yuv2planeX == yuv2planeX_10BE_c +// || yuv2planeX == yuv2planeX_10LE_c +// || yuv2planeX == yuv2planeX_9BE_c +// || yuv2planeX == yuv2planeX_9LE_c +// || yuv2planeX == yuv2planeX_16BE_c +// || yuv2planeX == yuv2planeX_16LE_c +// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); + + if(use_mmx_vfilter){ + vLumFilter= c->lumMmxFilter; + vChrFilter= c->chrMmxFilter; + } + if (vLumFilterSize == 1) { yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0); } if (!((dstY&chrSkipMask) || isGray(dstFormat))) { if (yuv2nv12cX) { - yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); + yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); } else if (vChrFilterSize == 1) { yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); } else { - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, - chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3); + yuv2planeX(vChrFilter, vChrFilterSize, + chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); } } if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ + if(use_mmx_vfilter){ + vLumFilter= c->alpMmxFilter; + } if (vLumFilterSize == 1) { yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0); } } @@ -643,6 +648,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ff_sws_init_input_funcs(c); + if (c->srcBpc == 8) { if (c->dstBpc <= 10) { c->hyScale = c->hcScale = hScale8To15_c; diff --git a/libswscale/swscale.h b/libswscale/swscale.h index b5a6a57..fa7100c 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -33,7 +33,7 @@ #define LIBSWSCALE_VERSION_MAJOR 2 #define LIBSWSCALE_VERSION_MINOR 1 -#define LIBSWSCALE_VERSION_MICRO 0 +#define LIBSWSCALE_VERSION_MICRO 100 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ @@ -55,6 +55,9 @@ #ifndef FF_API_SWS_CPU_CAPS #define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3) #endif +#ifndef FF_API_SWS_FORMAT_NAME +#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 3) +#endif /** * Return the LIBSWSCALE_VERSION_INT constant. diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 8da1201..18ec4d9 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,10 +34,14 @@ #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long +#define YUVRGB_TABLE_HEADROOM 128 + #define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients #define MAX_FILTER_SIZE 256 +#define DITHER1XBPP + #if HAVE_BIGENDIAN #define ALT32_CORR (-1) #else @@ -315,10 +319,10 @@ typedef struct SwsContext { int dstY; ///< Last destination vertical line output from last slice. int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() - uint8_t *table_rV[256]; - uint8_t *table_gU[256]; - int table_gV[256]; - uint8_t *table_bU[256]; + uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM]; + uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; + int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM]; + uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; //Colorspace stuff int contrast, brightness, saturation; // for sws_getColorspaceDetails @@ -326,6 +330,8 @@ typedef struct SwsContext { int dstColorspaceTable[4]; int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image). int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image). + int src0Alpha; + int dst0Alpha; int yuv2rgb_y_offset; int yuv2rgb_y_coeff; int yuv2rgb_v2r_coeff; @@ -380,8 +386,8 @@ typedef struct SwsContext { // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. - DECLARE_ALIGNED(8, ptrdiff_t, uv_off_px); ///< offset (in pixels) between u and v planes - DECLARE_ALIGNED(8, ptrdiff_t, uv_off_byte); ///< offset (in bytes) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes DECLARE_ALIGNED(8, uint16_t, dither16)[8]; DECLARE_ALIGNED(8, uint32_t, dither32)[8]; @@ -415,6 +421,8 @@ typedef struct SwsContext { #if HAVE_VIS DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10]; #endif + int32_t alpMmxFilter[4 * MAX_FILTER_SIZE]; + int use_mmx_vfilter; /* function pointers for swScale() */ yuv2planar1_fn yuv2plane1; @@ -425,14 +433,14 @@ typedef struct SwsContext { yuv2packedX_fn yuv2packedX; /// Unscaled conversion of luma plane to YV12 for horizontal scaler. - void (*lumToYV12)(uint8_t *dst, const uint8_t *src, + void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); /// Unscaled conversion of alpha plane to YV12 for horizontal scaler. - void (*alpToYV12)(uint8_t *dst, const uint8_t *src, + void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); /// Unscaled conversion of chroma planes to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); /** @@ -537,7 +545,13 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); void ff_bfin_get_unscaled_swscale(SwsContext *c); +#if FF_API_SWS_FORMAT_NAME +/** + * @deprecated Use av_get_pix_fmt_name() instead. + */ +attribute_deprecated const char *sws_format_name(enum PixelFormat format); +#endif #define is16BPS(x) \ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15) @@ -546,6 +560,8 @@ const char *sws_format_name(enum PixelFormat format); (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \ av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9) +#define isNBPS(x) is9_OR_10BPS(x) + #define isBE(x) \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE) @@ -559,7 +575,6 @@ const char *sws_format_name(enum PixelFormat format); #define isRGB(x) \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_RGB) - #if 0 // FIXME #define isGray(x) \ (!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \ @@ -572,55 +587,95 @@ const char *sws_format_name(enum PixelFormat format); (x) == PIX_FMT_GRAY16LE) #endif -#define isRGBinInt(x) \ - ((x) == PIX_FMT_RGB48BE || \ - (x) == PIX_FMT_RGB48LE || \ - (x) == PIX_FMT_RGB32 || \ - (x) == PIX_FMT_RGB32_1 || \ - (x) == PIX_FMT_RGB24 || \ - (x) == PIX_FMT_RGB565BE || \ - (x) == PIX_FMT_RGB565LE || \ - (x) == PIX_FMT_RGB555BE || \ - (x) == PIX_FMT_RGB555LE || \ - (x) == PIX_FMT_RGB444BE || \ - (x) == PIX_FMT_RGB444LE || \ - (x) == PIX_FMT_RGB8 || \ - (x) == PIX_FMT_RGB4 || \ - (x) == PIX_FMT_RGB4_BYTE || \ - (x) == PIX_FMT_MONOBLACK || \ - (x) == PIX_FMT_MONOWHITE) - -#define isBGRinInt(x) \ - ((x) == PIX_FMT_BGR48BE || \ - (x) == PIX_FMT_BGR48LE || \ - (x) == PIX_FMT_BGR32 || \ - (x) == PIX_FMT_BGR32_1 || \ - (x) == PIX_FMT_BGR24 || \ - (x) == PIX_FMT_BGR565BE || \ - (x) == PIX_FMT_BGR565LE || \ - (x) == PIX_FMT_BGR555BE || \ - (x) == PIX_FMT_BGR555LE || \ - (x) == PIX_FMT_BGR444BE || \ - (x) == PIX_FMT_BGR444LE || \ - (x) == PIX_FMT_BGR8 || \ - (x) == PIX_FMT_BGR4 || \ - (x) == PIX_FMT_BGR4_BYTE || \ - (x) == PIX_FMT_MONOBLACK || \ - (x) == PIX_FMT_MONOWHITE) - -#define isAnyRGB(x) \ - (isRGBinInt(x) || \ - isBGRinInt(x)) - -#define isALPHA(x) \ - (av_pix_fmt_descriptors[x].nb_components == 2 || \ +#define isRGBinInt(x) \ + ( \ + (x)==PIX_FMT_RGB48BE || \ + (x)==PIX_FMT_RGB48LE || \ + (x)==PIX_FMT_RGBA64BE || \ + (x)==PIX_FMT_RGBA64LE || \ + (x)==PIX_FMT_RGB32 || \ + (x)==PIX_FMT_RGB32_1 || \ + (x)==PIX_FMT_RGB24 || \ + (x)==PIX_FMT_RGB565BE || \ + (x)==PIX_FMT_RGB565LE || \ + (x)==PIX_FMT_RGB555BE || \ + (x)==PIX_FMT_RGB555LE || \ + (x)==PIX_FMT_RGB444BE || \ + (x)==PIX_FMT_RGB444LE || \ + (x)==PIX_FMT_RGB8 || \ + (x)==PIX_FMT_RGB4 || \ + (x)==PIX_FMT_RGB4_BYTE || \ + (x)==PIX_FMT_MONOBLACK || \ + (x)==PIX_FMT_MONOWHITE \ + ) +#define isBGRinInt(x) \ + ( \ + (x)==PIX_FMT_BGR48BE || \ + (x)==PIX_FMT_BGR48LE || \ + (x)==PIX_FMT_BGRA64BE || \ + (x)==PIX_FMT_BGRA64LE || \ + (x)==PIX_FMT_BGR32 || \ + (x)==PIX_FMT_BGR32_1 || \ + (x)==PIX_FMT_BGR24 || \ + (x)==PIX_FMT_BGR565BE || \ + (x)==PIX_FMT_BGR565LE || \ + (x)==PIX_FMT_BGR555BE || \ + (x)==PIX_FMT_BGR555LE || \ + (x)==PIX_FMT_BGR444BE || \ + (x)==PIX_FMT_BGR444LE || \ + (x)==PIX_FMT_BGR8 || \ + (x)==PIX_FMT_BGR4 || \ + (x)==PIX_FMT_BGR4_BYTE|| \ + (x)==PIX_FMT_MONOBLACK|| \ + (x)==PIX_FMT_MONOWHITE \ + ) + +#define isRGBinBytes(x) ( \ + (x)==PIX_FMT_RGB48BE \ + || (x)==PIX_FMT_RGB48LE \ + || (x)==PIX_FMT_RGBA64BE \ + || (x)==PIX_FMT_RGBA64LE \ + || (x)==PIX_FMT_RGBA \ + || (x)==PIX_FMT_ARGB \ + || (x)==PIX_FMT_RGB24 \ + ) +#define isBGRinBytes(x) ( \ + (x)==PIX_FMT_BGR48BE \ + || (x)==PIX_FMT_BGR48LE \ + || (x)==PIX_FMT_BGRA64BE \ + || (x)==PIX_FMT_BGRA64LE \ + || (x)==PIX_FMT_BGRA \ + || (x)==PIX_FMT_ABGR \ + || (x)==PIX_FMT_BGR24 \ + ) + +#define isAnyRGB(x) \ + ( \ + isRGBinInt(x) || \ + isBGRinInt(x) || \ + (x)==PIX_FMT_GBR24P \ + ) + +#define isALPHA(x) \ + (av_pix_fmt_descriptors[x].nb_components == 2 || \ av_pix_fmt_descriptors[x].nb_components == 4) +#if 1 +#define isPacked(x) ( \ + (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_YUYV422 \ + || (x)==PIX_FMT_UYVY422 \ + || (x)==PIX_FMT_Y400A \ + || isRGBinInt(x) \ + || isBGRinInt(x) \ + ) +#else #define isPacked(x) \ ((av_pix_fmt_descriptors[x].nb_components >= 2 && \ !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \ (x) == PIX_FMT_PAL8) +#endif #define isPlanar(x) \ (av_pix_fmt_descriptors[x].nb_components >= 2 && \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) @@ -631,6 +686,9 @@ const char *sws_format_name(enum PixelFormat format); extern const uint64_t ff_dither4[2]; extern const uint64_t ff_dither8[2]; +extern const uint8_t dithers[8][8][8]; +extern const uint16_t dither_scale[15][16]; + extern const AVClass sws_context_class; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 5fe2b14..dd7b04c 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,48 +34,6 @@ #include "libavutil/bswap.h" #include "libavutil/pixdesc.h" -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_1)[8][8] = { - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, -}; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_3)[8][8] = { - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, -}; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_64)[8][8] = { - { 18, 34, 30, 46, 17, 33, 29, 45,}, - { 50, 2, 62, 14, 49, 1, 61, 13,}, - { 26, 42, 22, 38, 25, 41, 21, 37,}, - { 58, 10, 54, 6, 57, 9, 53, 5,}, - { 16, 32, 28, 44, 19, 35, 31, 47,}, - { 48, 0, 60, 12, 51, 3, 63, 15,}, - { 24, 40, 20, 36, 27, 43, 23, 39,}, - { 56, 8, 52, 4, 59, 11, 55, 7,}, -}; -extern const uint8_t dither_8x8_128[8][8]; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = { - { 72, 136, 120, 184, 68, 132, 116, 180,}, - { 200, 8, 248, 56, 196, 4, 244, 52,}, - { 104, 168, 88, 152, 100, 164, 84, 148,}, - { 232, 40, 216, 24, 228, 36, 212, 20,}, - { 64, 128, 102, 176, 76, 140, 124, 188,}, - { 192, 0, 240, 48, 204, 12, 252, 60,}, - { 96, 160, 80, 144, 108, 172, 92, 156,}, - { 224, 32, 208, 16, 236, 44, 220, 28,}, -}; - #define RGB2YUV_SHIFT 15 #define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) #define BV (-(int) (0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) @@ -87,6 +45,102 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = { #define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) #define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) +DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ +{ + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, +},{ + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, +},{ + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, +},{ + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, +},{ + { 9, 17, 15, 23, 8, 16, 14, 22,}, + { 25, 1, 31, 7, 24, 0, 30, 6,}, + { 13, 21, 11, 19, 12, 20, 10, 18,}, + { 29, 5, 27, 3, 28, 4, 26, 2,}, + { 8, 16, 14, 22, 9, 17, 15, 23,}, + { 24, 0, 30, 6, 25, 1, 31, 7,}, + { 12, 20, 10, 18, 13, 21, 11, 19,}, + { 28, 4, 26, 2, 29, 5, 27, 3,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 36, 68, 60, 92, 34, 66, 58, 90,}, + { 100, 4,124, 28, 98, 2,122, 26,}, + { 52, 84, 44, 76, 50, 82, 42, 74,}, + { 116, 20,108, 12,114, 18,106, 10,}, + { 32, 64, 56, 88, 38, 70, 62, 94,}, + { 96, 0,120, 24,102, 6,126, 30,}, + { 48, 80, 40, 72, 54, 86, 46, 78,}, + { 112, 16,104, 8,118, 22,110, 14,}, +}}; + +static const uint8_t flat64[8]={64,64,64,64,64,64,64,64}; + +const uint16_t dither_scale[15][16]={ +{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, +{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, +{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, +{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, +{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, +{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, +{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, +{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, +{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, +{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, +{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, +{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, +{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, +{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, +{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, +}; + + static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, uint8_t val) { @@ -98,6 +152,20 @@ static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, } } +static void fillPlane16(uint8_t *plane, int stride, int width, int height, int y, + int alpha, int bits) +{ + int i, j; + uint8_t *ptr = plane + stride * y; + int v = alpha ? -1 : (1<<bits); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + AV_WN16(ptr+2*j, v); + } + ptr += stride; + } +} + static void copyPlane(const uint8_t *src, int srcStride, int srcSliceY, int srcSliceH, int width, uint8_t *dst, int dstStride) @@ -310,7 +378,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY; const uint8_t *srcPtr = src[0]; - if (srcFormat == PIX_FMT_Y400A) { + if (srcFormat == PIX_FMT_GRAY8A) { switch (dstFormat) { case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; @@ -332,7 +400,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], if (!conv) av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); else { for (i = 0; i < srcSliceH; i++) { conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); @@ -344,6 +412,92 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], return srcSliceH; } +static void gbr24ptopacked24(const uint8_t* src[], int srcStride[], uint8_t* dst, int dstStride, int srcSliceH, int width) +{ + int x, h, i; + for (h = 0; h < srcSliceH; h++) { + uint8_t *dest = dst + dstStride * h; + for (x = 0; x < width; x++) { + *dest++ = src[0][x]; + *dest++ = src[1][x]; + *dest++ = src[2][x]; + } + + for (i = 0; i < 3; i++) + src[i] += srcStride[i]; + } +} + +static void gbr24ptopacked32(const uint8_t* src[], int srcStride[], uint8_t* dst, int dstStride, int srcSliceH, int alpha_first, int width) +{ + int x, h, i; + for (h = 0; h < srcSliceH; h++) { + uint8_t *dest = dst + dstStride * h; + + if (alpha_first) { + for (x = 0; x < width; x++) { + *dest++ = 0xff; + *dest++ = src[0][x]; + *dest++ = src[1][x]; + *dest++ = src[2][x]; + } + } else { + for (x = 0; x < width; x++) { + *dest++ = src[0][x]; + *dest++ = src[1][x]; + *dest++ = src[2][x]; + *dest++ = 0xff; + } + } + + for (i = 0; i < 3; i++) + src[i] += srcStride[i]; + } +} + +static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + int alpha_first = 0; + if (c->srcFormat != PIX_FMT_GBR24P) { + av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); + return srcSliceH; + } + + switch (c->dstFormat) { + case PIX_FMT_BGR24: + gbr24ptopacked24((const uint8_t* []) {src[1], src[0], src[2]}, (int []) {srcStride[1], srcStride[0], srcStride[2]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, c->srcW); + break; + + case PIX_FMT_RGB24: + gbr24ptopacked24((const uint8_t* []) {src[2], src[0], src[1]}, (int []) {srcStride[2], srcStride[0], srcStride[1]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, c->srcW); + break; + + case PIX_FMT_ARGB: + alpha_first = 1; + case PIX_FMT_RGBA: + gbr24ptopacked32((const uint8_t* []) {src[2], src[0], src[1]}, (int []) {srcStride[2], srcStride[0], srcStride[1]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, alpha_first, c->srcW); + break; + + case PIX_FMT_ABGR: + alpha_first = 1; + case PIX_FMT_BGRA: + gbr24ptopacked32((const uint8_t* []) {src[1], src[0], src[2]}, (int []) {srcStride[1], srcStride[0], srcStride[2]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, alpha_first, c->srcW); + break; + + default: + av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); + } + + return srcSliceH; +} + #define isRGBA32(x) ( \ (x) == PIX_FMT_ARGB \ || (x) == PIX_FMT_RGBA \ @@ -442,7 +596,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], if (!conv) { av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); } else { const uint8_t *srcPtr = src[0]; uint8_t *dstPtr = dst[0]; @@ -532,25 +686,25 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } -#define clip9(x) av_clip_uintp2(x, 9) -#define clip10(x) av_clip_uintp2(x, 10) -#define DITHER_COPY(dst, dstStride, wfunc, src, srcStride, rfunc, dithers, shift, clip) \ - for (i = 0; i < height; i++) { \ - const uint8_t *dither = dithers[i & 7]; \ - for (j = 0; j < length - 7; j += 8) { \ - wfunc(&dst[j + 0], clip((rfunc(&src[j + 0]) + dither[0]) >> shift)); \ - wfunc(&dst[j + 1], clip((rfunc(&src[j + 1]) + dither[1]) >> shift)); \ - wfunc(&dst[j + 2], clip((rfunc(&src[j + 2]) + dither[2]) >> shift)); \ - wfunc(&dst[j + 3], clip((rfunc(&src[j + 3]) + dither[3]) >> shift)); \ - wfunc(&dst[j + 4], clip((rfunc(&src[j + 4]) + dither[4]) >> shift)); \ - wfunc(&dst[j + 5], clip((rfunc(&src[j + 5]) + dither[5]) >> shift)); \ - wfunc(&dst[j + 6], clip((rfunc(&src[j + 6]) + dither[6]) >> shift)); \ - wfunc(&dst[j + 7], clip((rfunc(&src[j + 7]) + dither[7]) >> shift)); \ - } \ - for (; j < length; j++) \ - wfunc(&dst[j], (rfunc(&src[j]) + dither[j & 7]) >> shift); \ - dst += dstStride; \ - src += srcStride; \ +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ + int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[src_depth-9][i&7];\ + for (j = 0; j < length-7; j+=8){\ + dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ + dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ + dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ + dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ + dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ + dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ + dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ + dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ + }\ + for (; j < length; j++)\ + dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ + dst += dstStride;\ + src += srcStride;\ } static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], @@ -564,162 +718,99 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], int height = (plane == 0 || plane == 3) ? srcSliceH: -((-srcSliceH) >> c->chrDstVSubSample); const uint8_t *srcPtr = src[plane]; uint8_t *dstPtr = dst[plane] + dstStride[plane] * y; + int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0); if (!dst[plane]) continue; // ignore palette for GRAY8 if (plane == 1 && !dst[2]) continue; if (!src[plane] || (plane == 1 && !src[2])) { - if (is16BPS(c->dstFormat)) - length *= 2; - fillPlane(dst[plane], dstStride[plane], length, height, y, - (plane == 3) ? 255 : 128); + if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) { + fillPlane16(dst[plane], dstStride[plane], length, height, y, + plane == 3, av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1); + } else { + fillPlane(dst[plane], dstStride[plane], length, height, y, + (plane == 3) ? 255 : 128); + } } else { - if (is9_OR_10BPS(c->srcFormat)) { + if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) + || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) + ) { const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1 + 1; const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1 + 1; const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; - if (is16BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t *) dstPtr; -#define COPY9_OR_10TO16(rfunc, wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx << (16 - src_depth)) | (srcpx >> (2 * src_depth - 16))); \ - } \ - dstPtr2 += dstStride[plane] / 2; \ - srcPtr2 += srcStride[plane] / 2; \ + if (dst_depth == 8) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO16(AV_RL16, AV_WB16); + } else if (src_depth == 8) { + for (i = 0; i < height; i++) { + #define COPY816(w)\ + if(shiftonly){\ + for (j = 0; j < length; j++)\ + w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\ + }else{\ + for (j = 0; j < length; j++)\ + w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\ + (srcPtr[j]>>(2*8-dst_depth)));\ } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY816(AV_WB16) } else { - COPY9_OR_10TO16(AV_RL16, AV_WL16); + COPY816(AV_WL16) } + dstPtr2 += dstStride[plane]/2; + srcPtr += srcStride[plane]; } - } else if (is9_OR_10BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t *) dstPtr; -#define COPY9_OR_10TO9_OR_10(loop) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - loop; \ - } \ - dstPtr2 += dstStride[plane] / 2; \ - srcPtr2 += srcStride[plane] / 2; \ - } -#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \ - if (dst_depth > src_depth) { \ - COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \ - } else if (dst_depth < src_depth) { \ - DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_1, 1, clip9); \ - } else { \ - COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16); - } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16); + } else if (src_depth <= dst_depth) { + for (i = 0; i < height; i++) { +#define COPY_UP(r,w) \ + if(shiftonly){\ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], v<<(dst_depth-src_depth));\ + }\ + }else{\ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ + (v>>(2*src_depth-dst_depth)));\ + }\ + } + if(isBE(c->srcFormat)){ + if(isBE(c->dstFormat)){ + COPY_UP(AV_RB16, AV_WB16) + } else { + COPY_UP(AV_RB16, AV_WL16) + } } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY_UP(AV_RL16, AV_WB16) + } else { + COPY_UP(AV_RL16, AV_WL16) + } } + dstPtr2 += dstStride[plane]/2; + srcPtr2 += srcStride[plane]/2; } } else { -#define W8(a, b) { *(a) = (b); } -#define COPY9_OR_10TO8(rfunc) \ - if (src_depth == 9) { \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_1, 1, av_clip_uint8); \ - } else { \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_3, 2, av_clip_uint8); \ - } - if (isBE(c->srcFormat)) { - COPY9_OR_10TO8(AV_RB16); - } else { - COPY9_OR_10TO8(AV_RL16); - } - } - } else if (is9_OR_10BPS(c->dstFormat)) { - const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1 + 1; - uint16_t *dstPtr2 = (uint16_t *) dstPtr; - - if (is16BPS(c->srcFormat)) { - const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; -#define COPY16TO9_OR_10(rfunc, wfunc) \ - if (dst_depth == 9) { \ - DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_128, 7, clip9); \ - } else { \ - DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_64, 6, clip10); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WB16); + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WB16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) } - } else { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WL16); + }else{ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WL16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) } } - } else /* 8bit */ { -#define COPY8TO9_OR_10(wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - const int srcpx = srcPtr[j]; \ - wfunc(&dstPtr2[j], (srcpx << (dst_depth - 8)) | (srcpx >> (16 - dst_depth))); \ - } \ - dstPtr2 += dstStride[plane] / 2; \ - srcPtr += srcStride[plane]; \ - } - if (isBE(c->dstFormat)) { - COPY8TO9_OR_10(AV_WB16); - } else { - COPY8TO9_OR_10(AV_WL16); - } - } - } else if (is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) { - const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; -#define COPY16TO8(rfunc) \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_256, 8, av_clip_uint8); - if (isBE(c->srcFormat)) { - COPY16TO8(AV_RB16); - } else { - COPY16TO8(AV_RL16); - } - } else if (!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) { - for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) { - dstPtr[ j << 1 ] = srcPtr[j]; - dstPtr[(j << 1) + 1] = srcPtr[j]; - } - srcPtr += srcStride[plane]; - dstPtr += dstStride[plane]; } } else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && isBE(c->srcFormat) != isBE(c->dstFormat)) { @@ -797,6 +888,17 @@ void ff_get_unscaled_swscale(SwsContext *c) && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) c->swScale= rgbToRgbWrapper; +#define isByteRGB(f) (\ + f == PIX_FMT_RGB32 ||\ + f == PIX_FMT_RGB32_1 ||\ + f == PIX_FMT_RGB24 ||\ + f == PIX_FMT_BGR32 ||\ + f == PIX_FMT_BGR32_1 ||\ + f == PIX_FMT_BGR24) + + if (isAnyRGB(srcFormat) && isPlanar(srcFormat) && isByteRGB(dstFormat)) + c->swScale= planarRgbToRgbWrapper; + /* bswap 16 bits per pixel/component packed formats */ if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR444) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR48) || @@ -809,13 +911,7 @@ void ff_get_unscaled_swscale(SwsContext *c) IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB565)) c->swScale = packed_16bpc_bswap; - if ((usePal(srcFormat) && ( - dstFormat == PIX_FMT_RGB32 || - dstFormat == PIX_FMT_RGB32_1 || - dstFormat == PIX_FMT_RGB24 || - dstFormat == PIX_FMT_BGR32 || - dstFormat == PIX_FMT_BGR32_1 || - dstFormat == PIX_FMT_BGR24))) + if (usePal(srcFormat) && isByteRGB(dstFormat)) c->swScale = palToRgbWrapper; if (srcFormat == PIX_FMT_YUV422P) { @@ -846,13 +942,14 @@ void ff_get_unscaled_swscale(SwsContext *c) if (srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) c->swScale = uyvyToYuv422Wrapper; +#define isPlanarGray(x) (isGray(x) && (x) != PIX_FMT_GRAY8A) /* simple copy */ if ( srcFormat == dstFormat || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P) || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P) || - (isPlanarYUV(srcFormat) && isGray(dstFormat)) || - (isPlanarYUV(dstFormat) && isGray(srcFormat)) || - (isGray(dstFormat) && isGray(srcFormat)) || + (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) || + (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) || + (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) && c->chrDstHSubSample == c->chrSrcHSubSample && c->chrDstVSubSample == c->chrSrcVSubSample && @@ -883,7 +980,7 @@ static void reset_ptr(const uint8_t *src[], int format) } } -static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, +static int check_image_pointers(const uint8_t * const data[4], enum PixelFormat pix_fmt, const int linesizes[4]) { const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; @@ -908,9 +1005,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c, int srcSliceH, uint8_t *const dst[], const int dstStride[]) { - int i; + int i, ret; const uint8_t *src2[4] = { srcSlice[0], srcSlice[1], srcSlice[2], srcSlice[3] }; uint8_t *dst2[4] = { dst[0], dst[1], dst[2], dst[3] }; + uint8_t *rgb0_tmp = NULL; // do not mess up sliceDir if we have a "trailing" 0-size slice if (srcSliceH == 0) @@ -920,7 +1018,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); return 0; } - if (!check_image_pointers(dst, c->dstFormat, dstStride)) { + if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) { av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); return 0; } @@ -935,9 +1033,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c, if (usePal(c->srcFormat)) { for (i = 0; i < 256; i++) { - int p, r, g, b, y, u, v; + int p, r, g, b, y, u, v, a = 0xff; if (c->srcFormat == PIX_FMT_PAL8) { p = ((const uint32_t *)(srcSlice[1]))[i]; + a = (p >> 24) & 0xFF; r = (p >> 16) & 0xFF; g = (p >> 8) & 0xFF; b = p & 0xFF; @@ -953,8 +1052,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, r = ( i >> 3 ) * 255; g = ((i >> 1) & 3) * 85; b = ( i & 1) * 255; - } else if (c->srcFormat == PIX_FMT_GRAY8 || - c->srcFormat == PIX_FMT_Y400A) { + } else if (c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { r = g = b = i; } else { assert(c->srcFormat == PIX_FMT_BGR4_BYTE); @@ -965,37 +1063,51 @@ int attribute_align_arg sws_scale(struct SwsContext *c, y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); - c->pal_yuv[i] = y + (u << 8) + (v << 16); + c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); switch (c->dstFormat) { case PIX_FMT_BGR32: #if !HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i] = r + (g << 8) + (b << 16); + c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); break; case PIX_FMT_BGR32_1: #if HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif - c->pal_rgb[i] = (r + (g << 8) + (b << 16)) << 8; + c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); break; case PIX_FMT_RGB32_1: #if HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i] = (b + (g << 8) + (r << 16)) << 8; + c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); break; case PIX_FMT_RGB32: #if !HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif default: - c->pal_rgb[i] = b + (g << 8) + (r << 16); + c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); } } } + if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) { + uint8_t *base; + int x,y; + rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); + base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; + for (y=0; y<srcSliceH; y++){ + memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW); + for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) { + base[ srcStride[0]*y + x] = 0xFF; + } + } + src2[0] = base; + } + // copy strides, so they can safely be modified if (c->sliceDir == 1) { // slices go from top to bottom @@ -1005,13 +1117,13 @@ int attribute_align_arg sws_scale(struct SwsContext *c, dstStride[3] }; reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t **) dst2, c->dstFormat); + reset_ptr((void*)dst2, c->dstFormat); /* reset slice direction at end of frame */ if (srcSliceY + srcSliceH == c->srcH) c->sliceDir = 0; - return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, + ret = c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2); } else { // slices go from bottom to top => we flip the image internally @@ -1031,15 +1143,18 @@ int attribute_align_arg sws_scale(struct SwsContext *c, dst2[3] += ( c->dstH - 1) * dstStride[3]; reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t **) dst2, c->dstFormat); + reset_ptr((void*)dst2, c->dstFormat); /* reset slice direction at end of frame */ if (!srcSliceY) c->sliceDir = 0; - return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, + ret = c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); } + + av_free(rgb0_tmp); + return ret; } /* Convert the palette to the same packed 32-bit format as the palette */ diff --git a/libswscale/utils.c b/libswscale/utils.c index 2fe9c5b..70b724e 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1,24 +1,25 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _SVID_SOURCE //needed for MAP_ANONYMOUS +#define _DARWIN_C_SOURCE // needed for MAP_ANON #include <inttypes.h> #include <string.h> #include <math.h> @@ -46,21 +47,23 @@ #include "libavutil/mathematics.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/avassert.h" unsigned swscale_version(void) { + av_assert0(LIBSWSCALE_VERSION_MICRO >= 100); return LIBSWSCALE_VERSION_INT; } const char *swscale_configuration(void) { - return LIBAV_CONFIGURATION; + return FFMPEG_CONFIGURATION; } const char *swscale_license(void) { #define LICENSE_PREFIX "libswscale license: " - return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1; + return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; } #define RET 0xC3 //near return opcode for x86 @@ -99,13 +102,20 @@ static const FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_RGBA] = { 1 , 1 }, [PIX_FMT_ABGR] = { 1 , 1 }, [PIX_FMT_BGRA] = { 1 , 1 }, + [PIX_FMT_0RGB] = { 1 , 1 }, + [PIX_FMT_RGB0] = { 1 , 1 }, + [PIX_FMT_0BGR] = { 1 , 1 }, + [PIX_FMT_BGR0] = { 1 , 1 }, [PIX_FMT_GRAY16BE] = { 1 , 1 }, [PIX_FMT_GRAY16LE] = { 1 , 1 }, [PIX_FMT_YUV440P] = { 1 , 1 }, [PIX_FMT_YUVJ440P] = { 1 , 1 }, [PIX_FMT_YUVA420P] = { 1 , 1 }, + [PIX_FMT_YUVA444P] = { 1 , 1 }, [PIX_FMT_RGB48BE] = { 1 , 1 }, [PIX_FMT_RGB48LE] = { 1 , 1 }, + [PIX_FMT_RGBA64BE] = { 0 , 0 }, + [PIX_FMT_RGBA64LE] = { 0 , 0 }, [PIX_FMT_RGB565BE] = { 1 , 1 }, [PIX_FMT_RGB565LE] = { 1 , 1 }, [PIX_FMT_RGB555BE] = { 1 , 1 }, @@ -127,6 +137,8 @@ static const FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_Y400A] = { 1 , 0 }, [PIX_FMT_BGR48BE] = { 1 , 1 }, [PIX_FMT_BGR48LE] = { 1 , 1 }, + [PIX_FMT_BGRA64BE] = { 0 , 0 }, + [PIX_FMT_BGRA64LE] = { 0 , 0 }, [PIX_FMT_YUV420P9BE] = { 1 , 1 }, [PIX_FMT_YUV420P9LE] = { 1 , 1 }, [PIX_FMT_YUV420P10BE] = { 1 , 1 }, @@ -139,6 +151,7 @@ static const FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_YUV444P9LE] = { 1 , 1 }, [PIX_FMT_YUV444P10BE] = { 1 , 1 }, [PIX_FMT_YUV444P10LE] = { 1 , 1 }, + [PIX_FMT_GBR24P] = { 1 , 0 }, [PIX_FMT_GBRP] = { 1 , 0 }, [PIX_FMT_GBRP9LE] = { 1 , 0 }, [PIX_FMT_GBRP9BE] = { 1 , 0 }, @@ -162,13 +175,12 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt) extern const int32_t ff_yuv2rgb_coeffs[8][4]; +#if FF_API_SWS_FORMAT_NAME const char *sws_format_name(enum PixelFormat format) { - if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name) - return av_pix_fmt_descriptors[format].name; - else - return "Unknown format"; + return av_get_pix_fmt_name(format); } +#endif static double getSplineCoeff(double a, double b, double c, double d, double dist) { @@ -182,7 +194,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, - SwsVector *srcFilter, SwsVector *dstFilter, double param[2], int is_horizontal) + SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) { int i; int filterSize; @@ -210,7 +222,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi } else if (flags&SWS_POINT) { // lame looking point sampling mode int i; - int xDstInSrc; + int64_t xDstInSrc; filterSize= 1; FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail); @@ -224,7 +236,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi } } else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) { // bilinear upscale int i; - int xDstInSrc; + int64_t xDstInSrc; filterSize= 2; FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail); @@ -236,7 +248,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi (*filterPos)[i]= xx; //bilinear upscale / linear interpolate / area averaging for (j=0; j<filterSize; j++) { - int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16); + int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16); if (coeff<0) coeff=0; filter[i*filterSize + j]= coeff; xx++; @@ -244,7 +256,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi xDstInSrc+= xInc; } } else { - int xDstInSrc; + int64_t xDstInSrc; int sizeFactor; if (flags&SWS_BICUBIC) sizeFactor= 4; @@ -273,7 +285,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi int j; (*filterPos)[i]= xx; for (j=0; j<filterSize; j++) { - int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13; + int64_t d= (FFABS(((int64_t)xx<<17) - xDstInSrc))<<13; double floatd; int64_t coeff; @@ -459,29 +471,27 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi //FIXME try to align filterPos if possible //fix borders - if (is_horizontal) { - for (i = 0; i < dstW; i++) { - int j; - if ((*filterPos)[i] < 0) { - // move filter coefficients left to compensate for filterPos - for (j = 1; j < filterSize; j++) { - int left = FFMAX(j + (*filterPos)[i], 0); - filter[i * filterSize + left] += filter[i * filterSize + j]; - filter[i * filterSize + j ] = 0; - } - (*filterPos)[i] = 0; + for (i=0; i<dstW; i++) { + int j; + if ((*filterPos)[i] < 0) { + // move filter coefficients left to compensate for filterPos + for (j=1; j<filterSize; j++) { + int left= FFMAX(j + (*filterPos)[i], 0); + filter[i*filterSize + left] += filter[i*filterSize + j]; + filter[i*filterSize + j]=0; } + (*filterPos)[i]= 0; + } - if ((*filterPos)[i] + filterSize > srcW) { - int shift = (*filterPos)[i] + filterSize - srcW; - // move filter coefficients right to compensate for filterPos - for (j = filterSize - 2; j >= 0; j--) { - int right = FFMIN(j + shift, filterSize - 1); - filter[i * filterSize + right] += filter[i * filterSize + j]; - filter[i * filterSize + j ] = 0; - } - (*filterPos)[i] = srcW - filterSize; + if ((*filterPos)[i] + filterSize > srcW) { + int shift= (*filterPos)[i] + filterSize - srcW; + // move filter coefficients right to compensate for filterPos + for (j=filterSize-2; j>=0; j--) { + int right= FFMIN(j + shift, filterSize-1); + filter[i*filterSize +right] += filter[i*filterSize +j]; + filter[i*filterSize +j]=0; } + (*filterPos)[i]= srcW - filterSize; } } @@ -719,7 +729,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation) { - if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; + if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; *inv_table = c->srcColorspaceTable; *table = c->dstColorspaceTable; @@ -743,6 +753,17 @@ static int handle_jpeg(enum PixelFormat *format) } } +static int handle_0alpha(enum PixelFormat *format) +{ + switch (*format) { + case PIX_FMT_0BGR : *format = PIX_FMT_ABGR ; return 1; + case PIX_FMT_BGR0 : *format = PIX_FMT_BGRA ; return 4; + case PIX_FMT_0RGB : *format = PIX_FMT_ARGB ; return 1; + case PIX_FMT_RGB0 : *format = PIX_FMT_RGBA ; return 4; + default: return 0; + } +} + SwsContext *sws_alloc_context(void) { SwsContext *c= av_mallocz(sizeof(SwsContext)); @@ -755,7 +776,7 @@ SwsContext *sws_alloc_context(void) int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) { - int i; + int i, j; int usesVFilter, usesHFilter; int unscaled; SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; @@ -763,7 +784,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) int srcH= c->srcH; int dstW= c->dstW; int dstH= c->dstH; - int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16), dst_stride_px = dst_stride >> 1; + int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16); int flags, cpu_flags; enum PixelFormat srcFormat= c->srcFormat; enum PixelFormat dstFormat= c->dstFormat; @@ -775,12 +796,23 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) unscaled = (srcW == dstW && srcH == dstH); + handle_jpeg(&srcFormat); + handle_jpeg(&dstFormat); + handle_0alpha(&srcFormat); + handle_0alpha(&dstFormat); + + if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat){ + av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); + c->srcFormat= srcFormat; + c->dstFormat= dstFormat; + } + if (!sws_isSupportedInput(srcFormat)) { - av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", sws_format_name(srcFormat)); + av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat)); return AVERROR(EINVAL); } if (!sws_isSupportedOutput(dstFormat)) { - av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", sws_format_name(dstFormat)); + av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat)); return AVERROR(EINVAL); } @@ -796,7 +828,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |SWS_SPLINE |SWS_BICUBLIN); if(!i || (i & (i-1))) { - av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen\n"); + av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen, got %X\n", i); return AVERROR(EINVAL); } /* sanity check */ @@ -828,21 +860,14 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation - if (flags & SWS_FULL_CHR_H_INT && - isAnyRGB(dstFormat) && - dstFormat != PIX_FMT_RGBA && - dstFormat != PIX_FMT_ARGB && - dstFormat != PIX_FMT_BGRA && - dstFormat != PIX_FMT_ABGR && - dstFormat != PIX_FMT_RGB24 && - dstFormat != PIX_FMT_BGR24) { - av_log(c, AV_LOG_ERROR, - "full chroma interpolation for destination format '%s' not yet implemented\n", - sws_format_name(dstFormat)); - flags &= ~SWS_FULL_CHR_H_INT; - c->flags = flags; + if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) { + if (dstW&1) { + av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n"); + flags |= SWS_FULL_CHR_H_INT; + c->flags = flags; + } else + c->chrDstHSubSample = 1; } - if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1; // drop some chroma lines if the user wants it c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT; @@ -869,7 +894,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (c->swScale) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); return 0; } } @@ -880,18 +905,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1; if (c->dstBpc < 8) c->dstBpc = 8; + if (isAnyRGB(srcFormat) || srcFormat == PIX_FMT_PAL8) + c->srcBpc = 16; if (c->dstBpc == 16) dst_stride <<= 1; - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, - (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16, - fail); + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->srcBpc == 8 && c->dstBpc <= 10) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } - if (usesHFilter) c->canMMX2BeUsed=0; + if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0; } else c->canMMX2BeUsed=0; @@ -911,7 +936,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->chrXInc+= 20; } //we don't use the x86 asm scaler if MMX is available - else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { + else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 10) { c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; } @@ -936,15 +961,22 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize); #endif +#ifdef MAP_ANONYMOUS + if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED) +#else if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode) +#endif + { + av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); return AVERROR(ENOMEM); + } FF_ALLOCZ_OR_GOTO(c, c->hLumFilter , (dstW /8+8)*sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilter , (c->chrDstW /4+8)*sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW /2/8+8)*sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW/2/4+8)*sizeof(int32_t), fail); - initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, c->hLumFilter, c->hLumFilterPos, 8); - initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, c->hChrFilter, c->hChrFilterPos, 4); + initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); + initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); #ifdef MAP_ANONYMOUS mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ); @@ -961,12 +993,12 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, srcW , dstW, filterAlign, 1<<14, (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, - srcFilter->lumH, dstFilter->lumH, c->param, 1) < 0) + srcFilter->lumH, dstFilter->lumH, c->param) < 0) goto fail; if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1<<14, (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, - srcFilter->chrH, dstFilter->chrH, c->param, 1) < 0) + srcFilter->chrH, dstFilter->chrH, c->param) < 0) goto fail; } } // initialize horizontal stuff @@ -981,12 +1013,12 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, srcH , dstH, filterAlign, (1<<12), (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, - srcFilter->lumV, dstFilter->lumV, c->param, 0) < 0) + srcFilter->lumV, dstFilter->lumV, c->param) < 0) goto fail; if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, filterAlign, (1<<12), (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, - srcFilter->chrV, dstFilter->chrV, c->param, 0) < 0) + srcFilter->chrV, dstFilter->chrV, c->param) < 0) goto fail; #if HAVE_ALTIVEC @@ -1013,7 +1045,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->vLumBufSize= c->vLumFilterSize; c->vChrBufSize= c->vChrFilterSize; for (i=0; i<dstH; i++) { - int chrI= i*c->chrDstH / dstH; + int chrI= (int64_t)i*c->chrDstH / dstH; int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample)); @@ -1027,20 +1059,20 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) // allocate pixbufs (we use dynamic allocation because otherwise we would need to // allocate several megabytes to handle all possible cases) - FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*3*sizeof(int16_t*), fail); - FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*3*sizeof(int16_t*), fail); - FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*3*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) - FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*3*sizeof(int16_t*), fail); + FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000) /* align at 16 bytes for AltiVec */ for (i=0; i<c->vLumBufSize; i++) { FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+16, fail); c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; } - // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate) - c->uv_off_px = dst_stride_px + 64 / (c->dstBpc &~ 7); - c->uv_off_byte = dst_stride + 16; + // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) + c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); + c->uv_offx2 = dst_stride + 16; for (i=0; i<c->vChrBufSize; i++) { FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize]; @@ -1054,7 +1086,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) //try to avoid drawing green stuff between the right end and the stride end for (i=0; i<c->vChrBufSize; i++) - memset(c->chrUPixBuf[i], 64, dst_stride*2+1); + if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){ + av_assert0(c->dstBpc > 10); + for(j=0; j<dst_stride/2+1; j++) + ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18; + } else + for(j=0; j<dst_stride+1; j++) + ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14; assert(c->chrDstH <= dstH); @@ -1073,7 +1111,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) else av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); av_log(c, AV_LOG_INFO, "from %s to %s%s ", - sws_format_name(srcFormat), + av_get_pix_fmt_name(srcFormat), #ifdef DITHER1XBPP dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 || dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || @@ -1081,7 +1119,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) #else "", #endif - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); @@ -1119,6 +1157,8 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, c->dstH= dstH; c->srcRange = handle_jpeg(&srcFormat); c->dstRange = handle_jpeg(&dstFormat); + c->src0Alpha = handle_0alpha(&srcFormat); + c->dst0Alpha = handle_0alpha(&dstFormat); c->srcFormat= srcFormat; c->dstFormat= dstFormat; @@ -1482,7 +1522,7 @@ void sws_freeContext(SwsContext *c) #endif /* HAVE_MMX */ av_freep(&c->yuvTable); - av_free(c->formatConvBuffer); + av_freep(&c->formatConvBuffer); av_free(c); } @@ -1517,10 +1557,12 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, context->srcW = srcW; context->srcH = srcH; context->srcRange = handle_jpeg(&srcFormat); + context->src0Alpha = handle_0alpha(&srcFormat); context->srcFormat = srcFormat; context->dstW = dstW; context->dstH = dstH; context->dstRange = handle_jpeg(&dstFormat); + context->dst0Alpha = handle_0alpha(&dstFormat); context->dstFormat = dstFormat; context->flags = flags; context->param[0] = param[0]; diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm index 66d8845..c4174ee 100644 --- a/libswscale/x86/input.asm +++ b/libswscale/x86/input.asm @@ -36,8 +36,8 @@ SECTION_RODATA %define GV 0xD0E3 %define BV 0xF6E4 -rgb_Yrnd: times 4 dd 0x84000 ; 16.5 << 15 -rgb_UVrnd: times 4 dd 0x404000 ; 128.5 << 15 +rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15 +rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15 bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY @@ -83,7 +83,7 @@ SECTION .text ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_Y_FN 2-3 -cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w +cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3 %if mmsize == 8 mova m5, [%2_Ycoeff_12x4] mova m6, [%2_Ycoeff_3x56] @@ -115,6 +115,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w %if ARCH_X86_64 movsxd wq, wd %endif + add wq, wq add dstq, wq neg wq %if notcpuflag(ssse3) @@ -158,12 +159,11 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7] paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] } paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] } - psrad m0, 15 - psrad m2, 15 + psrad m0, 9 + psrad m2, 9 packssdw m0, m2 ; (word) { Y[0-7] } - packuswb m0, m0 ; (byte) { Y[0-7] } - movh [dstq+wq], m0 - add wq, mmsize / 2 + mova [dstq+wq], m0 + add wq, mmsize jl .loop REP_RET %endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 @@ -172,7 +172,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_UV_FN 2-3 -cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 %if ARCH_X86_64 mova m8, [%2_Ucoeff_12x4] mova m9, [%2_Ucoeff_3x56] @@ -203,10 +203,11 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w %endif ; x86-32/64 %endif ; cpuflag(ssse3) %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif + add wq, wq add dstUq, wq add dstVq, wq neg wq @@ -264,23 +265,20 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] } paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] } paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] } - psrad m0, 15 - psrad m2, 15 - psrad m1, 15 - psrad m4, 15 + psrad m0, 9 + psrad m2, 9 + psrad m1, 9 + psrad m4, 9 packssdw m0, m1 ; (word) { U[0-7] } packssdw m2, m4 ; (word) { V[0-7] } %if mmsize == 8 - packuswb m0, m0 ; (byte) { U[0-3] } - packuswb m2, m2 ; (byte) { V[0-3] } - movh [dstUq+wq], m0 - movh [dstVq+wq], m2 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %else ; mmsize == 16 - packuswb m0, m2 ; (byte) { U[0-7], V[0-7] } - movh [dstUq+wq], m0 - movhps [dstVq+wq], m0 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %endif ; mmsize == 8/16 - add wq, mmsize / 2 + add wq, mmsize jl .loop REP_RET %endif ; ARCH_X86_64 && %0 == 3 @@ -306,13 +304,15 @@ RGB24_FUNCS 10, 12 INIT_XMM ssse3 RGB24_FUNCS 11, 13 +%if HAVE_AVX INIT_XMM avx RGB24_FUNCS 11, 13 +%endif ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_Y_FN 5-6 -cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w +cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 mova m5, [rgba_Ycoeff_%2%4] mova m6, [rgba_Ycoeff_%3%5] %if %0 == 6 @@ -323,6 +323,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w movsxd wq, wd %endif lea srcq, [srcq+wq*4] + add wq, wq add dstq, wq neg wq mova m4, [rgb_Yrnd] @@ -330,8 +331,8 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w psrlw m7, 8 ; (word) { 0x00ff } x4 .loop: ; FIXME check alignment and use mova - movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] - movu m2, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] + movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] + movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] @@ -341,12 +342,11 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w paddd m2, m4 ; += rgb_Yrnd paddd m0, m1 ; (dword) { Y[0-3] } paddd m2, m3 ; (dword) { Y[4-7] } - psrad m0, 15 - psrad m2, 15 + psrad m0, 9 + psrad m2, 9 packssdw m0, m2 ; (word) { Y[0-7] } - packuswb m0, m0 ; (byte) { Y[0-7] } - movh [dstq+wq], m0 - add wq, mmsize / 2 + mova [dstq+wq], m0 + add wq, mmsize jl .loop REP_RET %endif ; %0 == 3 @@ -355,7 +355,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_UV_FN 5-6 -cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 %if ARCH_X86_64 mova m8, [rgba_Ucoeff_%2%4] mova m9, [rgba_Ucoeff_%3%5] @@ -376,21 +376,22 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w %else ; ARCH_X86_64 && %0 == 6 .body: %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif + add wq, wq add dstUq, wq add dstVq, wq - lea srcq, [srcq+wq*4] + lea srcq, [srcq+wq*2] neg wq pcmpeqb m7, m7 psrlw m7, 8 ; (word) { 0x00ff } x4 mova m6, [rgb_UVrnd] .loop: ; FIXME check alignment and use mova - movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] - movu m4, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] + movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] + movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] @@ -406,25 +407,22 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7] paddd m3, m6 ; += rgb_UVrnd paddd m5, m6 ; += rgb_UVrnd - psrad m0, 15 + psrad m0, 9 paddd m1, m3 ; (dword) { V[4-7] } paddd m4, m5 ; (dword) { U[4-7] } - psrad m2, 15 - psrad m4, 15 - psrad m1, 15 + psrad m2, 9 + psrad m4, 9 + psrad m1, 9 packssdw m0, m4 ; (word) { U[0-7] } packssdw m2, m1 ; (word) { V[0-7] } %if mmsize == 8 - packuswb m0, m0 ; (byte) { U[0-7] } - packuswb m2, m2 ; (byte) { V[0-7] } - movh [dstUq+wq], m0 - movh [dstVq+wq], m2 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %else ; mmsize == 16 - packuswb m0, m2 ; (byte) { U[0-7], V[0-7] } - movh [dstUq+wq], m0 - movhps [dstVq+wq], m0 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %endif ; mmsize == 8/16 - add wq, mmsize / 2 + add wq, mmsize jl .loop REP_RET %endif ; ARCH_X86_64 && %0 == 3 @@ -452,8 +450,10 @@ RGB32_FUNCS 0, 0 INIT_XMM sse2 RGB32_FUNCS 8, 12 +%if HAVE_AVX INIT_XMM avx RGB32_FUNCS 8, 12 +%endif ;----------------------------------------------------------------------------- ; YUYV/UYVY/NV12/NV21 packed pixel shuffling. @@ -490,7 +490,7 @@ RGB32_FUNCS 8, 12 ; will be the same (i.e. YUYV+AVX), and thus we don't need to ; split the loop in an aligned and unaligned case %macro YUYV_TO_Y_FN 2-3 -cglobal %2ToY, 3, 3, %1, dst, src, w +cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w %if ARCH_X86_64 movsxd wq, wd %endif @@ -560,11 +560,11 @@ cglobal %2ToY, 3, 3, %1, dst, src, w ; will be the same (i.e. UYVY+AVX), and thus we don't need to ; split the loop in an aligned and unaligned case %macro YUYV_TO_UV_FN 2-3 -cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif add dstUq, wq add dstVq, wq @@ -594,8 +594,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w .loop_%1: mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... } mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... } - pand m2, m0, m4 ; (word) { U0, U1, ..., U7 } - pand m3, m1, m4 ; (word) { U8, U9, ..., U15 } + pand m2, m0, m5 ; (word) { U0, U1, ..., U7 } + pand m3, m1, m5 ; (word) { U8, U9, ..., U15 } psrlw m0, 8 ; (word) { V0, V1, ..., V7 } psrlw m1, 8 ; (word) { V8, V9, ..., V15 } packuswb m2, m3 ; (byte) { U0, ..., U15 } @@ -615,11 +615,11 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w ; %1 = nr. of XMM registers ; %2 = nv12 or nv21 %macro NVXX_TO_UV_FN 2 -cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif add dstUq, wq add dstVq, wq @@ -627,8 +627,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w test srcq, 15 %endif lea srcq, [srcq+wq*2] - pcmpeqb m4, m4 ; (byte) { 0xff } x 16 - psrlw m4, 8 ; (word) { 0x00ff } x 8 + pcmpeqb m5, m5 ; (byte) { 0xff } x 16 + psrlw m5, 8 ; (word) { 0x00ff } x 8 %if mmsize == 16 jnz .loop_u_start neg wq @@ -660,6 +660,7 @@ YUYV_TO_UV_FN 3, uyvy NVXX_TO_UV_FN 5, nv12 NVXX_TO_UV_FN 5, nv21 +%if HAVE_AVX INIT_XMM avx ; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but ; that's not faster in practice @@ -667,3 +668,4 @@ YUYV_TO_UV_FN 3, yuyv YUYV_TO_UV_FN 3, uyvy, 1 NVXX_TO_UV_FN 5, nv12 NVXX_TO_UV_FN 5, nv21 +%endif diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index c4daa82..11e89a4 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -264,10 +264,12 @@ yuv2planeX_fn 9, 7, 5 yuv2planeX_fn 10, 7, 5 yuv2planeX_fn 16, 8, 5 +%if HAVE_AVX INIT_XMM avx yuv2planeX_fn 8, 10, 7 yuv2planeX_fn 9, 7, 5 yuv2planeX_fn 10, 7, 5 +%endif ; %1=outout-bpc, %2=alignment (u/a) %macro yuv2plane1_mainloop 2 @@ -402,8 +404,10 @@ yuv2plane1_fn 16, 6, 3 INIT_XMM sse4 yuv2plane1_fn 16, 5, 3 +%if HAVE_AVX INIT_XMM avx yuv2plane1_fn 8, 5, 5 yuv2plane1_fn 9, 5, 3 yuv2plane1_fn 10, 5, 3 yuv2plane1_fn 16, 5, 3 +%endif diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 282618c..9359f0b 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -68,6 +68,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; +DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; #define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index c255610..3bca43c 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -801,27 +801,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -840,9 +819,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %6, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -870,9 +850,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %6, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -892,7 +873,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -919,9 +900,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -943,9 +924,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %8, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -973,9 +956,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %8, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -994,7 +979,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -1021,9 +1006,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -1066,12 +1051,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %5, %%mm1 \n\t" + "pmulhw %6, %%mm2 \n\t" PACK_RGB32 :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid),"m"(mul15_hi) :"memory"); d += 16; s += 4; @@ -1081,9 +1067,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; } } @@ -1108,12 +1094,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %7, %%mm1 \n\t" + "pmulhw %6, %%mm2 \n\t" PACK_RGB32 :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid) :"memory"); d += 16; s += 4; @@ -1123,9 +1111,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; } } diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 0d367f7..3c8b6fa 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -411,11 +411,13 @@ SCALE_FUNC %1, %2, X, X8, %3, 7, %4 SCALE_FUNCS 8, 15, %1, %2 SCALE_FUNCS 9, 15, %1, %3 SCALE_FUNCS 10, 15, %1, %3 +SCALE_FUNCS 14, 15, %1, %3 SCALE_FUNCS 16, 15, %1, %4 %endif ; !sse4 SCALE_FUNCS 8, 19, %1, %2 SCALE_FUNCS 9, 19, %1, %3 SCALE_FUNCS 10, 19, %1, %3 +SCALE_FUNCS 14, 19, %1, %3 SCALE_FUNCS 16, 19, %1, %4 %endmacro diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index c112cb8..10a1542 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -66,6 +66,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; + //MMX versions #if HAVE_MMX #undef RENAME @@ -90,6 +91,7 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI const int flags= c->flags; int16_t **lumPixBuf= c->lumPixBuf; int16_t **chrUPixBuf= c->chrUPixBuf; + int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; int16_t *vLumFilterPos= c->vLumFilterPos; @@ -98,6 +100,7 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI int16_t *vChrFilter= c->vChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; int32_t *chrMmxFilter= c->chrMmxFilter; + int32_t av_unused *alpMmxFilter= c->alpMmxFilter; const int vLumFilterSize= c->vLumFilterSize; const int vChrFilterSize= c->vChrFilterSize; const int chrDstY= dstY>>c->chrDstVSubSample; @@ -111,8 +114,9 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI c->greenDither= ff_dither4[dstY&1]; c->redDither= ff_dither8[(dstY+1)&1]; if (dstY < dstH - 2) { - const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; int i; if (flags & SWS_ACCURATE_RND) { int s= APCK_SIZE / 8; @@ -122,6 +126,12 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI lumMmxFilter[s*i+APCK_COEF/4 ]= lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + *(const void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; + *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; + alpMmxFilter[s*i+APCK_COEF/4 ]= + alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; + } } for (i=0; i<vChrFilterSize; i+=2) { *(const void**)&chrMmxFilter[s*i ]= chrUSrcPtr[i ]; @@ -136,6 +146,11 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI lumMmxFilter[4*i+2]= lumMmxFilter[4*i+3]= ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; + if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i]; + alpMmxFilter[4*i+2]= + alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; + } } for (i=0; i<vChrFilterSize; i++) { *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i]; @@ -147,6 +162,67 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI } } +#if HAVE_MMX2 +static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ + if(((int)dest) & 15){ + return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset); + } + if (offset) { + __asm__ volatile("movq (%0), %%xmm3\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "psrlq $24, %%xmm3\n\t" + "psllq $40, %%xmm4\n\t" + "por %%xmm4, %%xmm3\n\t" + :: "r"(dither) + ); + } else { + __asm__ volatile("movq (%0), %%xmm3\n\t" + :: "r"(dither) + ); + } + __asm__ volatile( + "pxor %%xmm0, %%xmm0\n\t" + "punpcklbw %%xmm0, %%xmm3\n\t" + "psraw $4, %%xmm3\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "movdqa %%xmm3, %%xmm7\n\t" + "movl %3, %%ecx\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\ + "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\ + "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%xmm0, %%xmm2 \n\t"\ + "pmulhw %%xmm0, %%xmm5 \n\t"\ + "paddw %%xmm2, %%xmm3 \n\t"\ + "paddw %%xmm5, %%xmm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%xmm3 \n\t"\ + "psraw $3, %%xmm4 \n\t"\ + "packuswb %%xmm4, %%xmm3 \n\t" + "movntdq %%xmm3, (%1, %%"REG_c")\n\t" + "add $16, %%"REG_c" \n\t"\ + "cmp %2, %%"REG_c" \n\t"\ + "movdqa %%xmm7, %%xmm3\n\t" + "movdqa %%xmm7, %%xmm4\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) + : "%"REG_d, "%"REG_S, "%"REG_c + ); +} +#endif + #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ SwsContext *c, int16_t *data, \ @@ -158,10 +234,12 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( SCALE_FUNC(filter_n, 8, 15, opt); \ SCALE_FUNC(filter_n, 9, 15, opt); \ SCALE_FUNC(filter_n, 10, 15, opt); \ + SCALE_FUNC(filter_n, 14, 15, opt); \ SCALE_FUNC(filter_n, 16, 15, opt); \ SCALE_FUNC(filter_n, 8, 19, opt); \ SCALE_FUNC(filter_n, 9, 19, opt); \ SCALE_FUNC(filter_n, 10, 19, opt); \ + SCALE_FUNC(filter_n, 14, 19, opt); \ SCALE_FUNC(filter_n, 16, 19, opt) #define SCALE_FUNCS_MMX(opt) \ @@ -253,6 +331,10 @@ void ff_sws_init_swScale_mmx(SwsContext *c) #if HAVE_MMX2 if (cpu_flags & AV_CPU_FLAG_MMX2) sws_init_swScale_MMX2(c); + if (cpu_flags & AV_CPU_FLAG_SSE3){ + if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) + c->yuv2planeX = yuv2yuvX_sse3; + } #endif #if HAVE_YASM @@ -266,7 +348,10 @@ void ff_sws_init_swScale_mmx(SwsContext *c) } else if (c->srcBpc == 10) { \ hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ ff_hscale10to19_ ## filtersize ## _ ## opt1; \ - } else /* c->srcBpc == 16 */ { \ + } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale14to19_ ## filtersize ## _ ## opt1; \ + } else { /* c->srcBpc == 16 */ \ hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ ff_hscale16to19_ ## filtersize ## _ ## opt1; \ } \ @@ -282,7 +367,7 @@ switch(c->dstBpc){ \ case 16: do_16_case; break; \ case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ - default: vscalefn = ff_yuv2planeX_8_ ## opt; break; \ + default: /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \ } #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ switch(c->dstBpc){ \ @@ -401,7 +486,7 @@ switch(c->dstBpc){ \ c->yuv2plane1 = ff_yuv2plane1_16_sse4; } - if (cpu_flags & AV_CPU_FLAG_AVX) { + if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx,); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 6fc03e4..b179184 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1,25 +1,26 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #undef REAL_MOVNTQ #undef MOVNTQ +#undef MOVNTQ2 #undef PREFETCH #if COMPILE_TEMPLATE_MMX2 @@ -30,11 +31,84 @@ #if COMPILE_TEMPLATE_MMX2 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" +#define MOVNTQ2 "movntq " #else #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" +#define MOVNTQ2 "movq " #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) +#if !COMPILE_TEMPLATE_MMX2 +static av_always_inline void +dither_8to16(const uint8_t *srcDither, int rot) +{ + if (rot) { + __asm__ volatile("pxor %%mm0, %%mm0\n\t" + "movq (%0), %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "psrlq $24, %%mm3\n\t" + "psllq $40, %%mm4\n\t" + "por %%mm4, %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "punpcklbw %%mm0, %%mm3\n\t" + "punpckhbw %%mm0, %%mm4\n\t" + :: "r"(srcDither) + ); + } else { + __asm__ volatile("pxor %%mm0, %%mm0\n\t" + "movq (%0), %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "punpcklbw %%mm0, %%mm3\n\t" + "punpckhbw %%mm0, %%mm4\n\t" + :: "r"(srcDither) + ); + } +} +#endif + +static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ + dither_8to16(dither, offset); + __asm__ volatile(\ + "psraw $4, %%mm3\n\t" + "psraw $4, %%mm4\n\t" + "movq %%mm3, %%mm6\n\t" + "movq %%mm4, %%mm7\n\t" + "movl %3, %%ecx\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ + "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\ + "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t"\ + "pmulhw %%mm0, %%mm5 \n\t"\ + "paddw %%mm2, %%mm3 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%mm3 \n\t"\ + "psraw $3, %%mm4 \n\t"\ + "packuswb %%mm4, %%mm3 \n\t" + MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t" + "add $8, %%"REG_c" \n\t"\ + "cmp %2, %%"REG_c" \n\t"\ + "movq %%mm6, %%mm3\n\t" + "movq %%mm7, %%mm4\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) + : "%"REG_d, "%"REG_S, "%"REG_c + ); +} + #define YSCALEYUV2PACKEDX_UV \ __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ @@ -260,7 +334,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX_ACCURATE @@ -293,7 +367,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX @@ -350,7 +424,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -374,7 +448,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -427,7 +501,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -451,7 +525,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -584,7 +658,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -608,7 +682,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -649,7 +723,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -670,7 +744,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -786,8 +860,8 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], : "%r8" ); #else - *(const uint16_t **)(&c->u_temp)=abuf0; - *(const uint16_t **)(&c->v_temp)=abuf1; + c->u_temp=(intptr_t)abuf0; + c->v_temp=(intptr_t)abuf1; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1559,9 +1633,9 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) { enum PixelFormat dstFormat = c->dstFormat; - if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && - dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) { - if (!(c->flags & SWS_BITEXACT)) { + c->use_mmx_vfilter= 0; + if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12 + && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { if (c->flags & SWS_ACCURATE_RND) { if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { @@ -1574,6 +1648,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } else { + c->use_mmx_vfilter= 1; + c->yuv2planeX = RENAME(yuv2yuvX ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; @@ -1585,7 +1661,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } - } if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c index 0eaea77..fd184eb 100644 --- a/libswscale/x86/yuv2rgb_mmx.c +++ b/libswscale/x86/yuv2rgb_mmx.c @@ -7,20 +7,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -68,10 +68,6 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); - if (c->srcFormat != PIX_FMT_YUV420P && - c->srcFormat != PIX_FMT_YUVA420P) - return NULL; - #if HAVE_MMX2 if (cpu_flags & AV_CPU_FLAG_MMX2) { switch (c->dstFormat) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index 5d1fa5b..624de14 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -4,20 +4,20 @@ * Copyright (C) 2001-2007 Michael Niedermayer * (c) 2010 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -43,17 +43,14 @@ if (h_size * depth > FFABS(dstStride[0])) \ h_size -= 8; \ \ - if (c->srcFormat == PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ + vshift = c->srcFormat != PIX_FMT_YUV422P; \ \ __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ for (y = 0; y < srcSliceH; y++) { \ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ + const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ + const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ #define YUV2RGB_INITIAL_LOAD \ @@ -141,6 +138,7 @@ : "+r" (index), "+r" (image) \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ "r" (py - 2*index) \ + : "memory" \ ); \ } \ @@ -148,6 +146,7 @@ : "+r" (index), "+r" (image) \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ "r" (py - 2*index), "r" (pa - 2*index) \ + : "memory" \ ); \ } \ @@ -188,7 +187,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -216,7 +215,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -306,7 +305,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -324,7 +323,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -368,7 +367,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -389,7 +388,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -411,7 +410,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -432,7 +431,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index 24dc960..84e3a17 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -6,20 +6,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,6 +34,7 @@ #include "swscale_internal.h" #include "libavutil/cpu.h" #include "libavutil/bswap.h" +#include "libavutil/pixdesc.h" extern const uint8_t dither_4x4_16[4][8]; extern const uint8_t dither_8x8_32[8][8]; @@ -61,9 +62,9 @@ const int *sws_getCoefficients(int colorspace) #define LOADCHROMA(i) \ U = pu[i]; \ V = pv[i]; \ - r = (void *)c->table_rV[V]; \ - g = (void *)(c->table_gU[U] + c->table_gV[V]); \ - b = (void *)c->table_bU[U]; + r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \ + g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \ + b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM]; #define PUTRGB(dst,src,i) \ Y = src[2*i]; \ @@ -478,7 +479,7 @@ CLOSEYUV2RGBFUNC(8) YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) const uint8_t *d128 = dither_8x8_220[y&7]; char out_1 = 0, out_2 = 0; - g= c->table_gU[128] + c->table_gV[128]; + g= c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; #define PUTRGB1(out,src,i,o) \ Y = src[2*i]; \ @@ -519,7 +520,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) if (t) return t; - av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat)); + av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); switch (c->dstFormat) { case PIX_FMT_BGR48BE: @@ -527,9 +529,9 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) case PIX_FMT_RGB48BE: case PIX_FMT_RGB48LE: return yuv2rgb_c_48; case PIX_FMT_ARGB: - case PIX_FMT_ABGR: if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) return yuva2argb_c; + case PIX_FMT_ABGR: if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) return yuva2argb_c; case PIX_FMT_RGBA: - case PIX_FMT_BGRA: return (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) ? yuva2rgba_c : yuv2rgb_c_32; + case PIX_FMT_BGRA: return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32; case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb; case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr; case PIX_FMT_RGB565: @@ -551,29 +553,27 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) return NULL; } -static void fill_table(uint8_t* table[256], const int elemsize, const int inc, void *y_tab) +static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int inc, void *y_tab) { int i; - int64_t cb = 0; uint8_t *y_table = y_tab; y_table -= elemsize * (inc >> 9); - for (i = 0; i < 256; i++) { + for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { + int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc; table[i] = y_table + elemsize * (cb >> 16); - cb += inc; } } -static void fill_gv_table(int table[256], const int elemsize, const int inc) +static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int inc) { int i; - int64_t cb = 0; int off = -(inc >> 9); - for (i = 0; i < 256; i++) { + for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { + int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc; table[i] = elemsize * (off + (cb >> 16)); - cb += inc; } } @@ -611,7 +611,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int uint8_t *y_table; uint16_t *y_table16; uint32_t *y_table32; - int i, base, rbase, gbase, bbase, abase, needAlpha; + int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha; const int yoffs = fullRange ? 384 : 326; int64_t crv = inv_table[0]; |