diff options
Diffstat (limited to 'libswscale/swscale.c')
-rw-r--r-- | libswscale/swscale.c | 150 |
1 files changed, 78 insertions, 72 deletions
diff --git a/libswscale/swscale.c b/libswscale/swscale.c index b231302..0578e2d 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -27,6 +27,7 @@ #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/cpu.h" #include "libavutil/avutil.h" @@ -47,6 +48,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = { 64, 64, 64, 64, 64, 64, 64, 64 }; + static av_always_inline void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val) @@ -69,6 +71,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; int sh = bits - 4; + if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15) + sh= 9; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -90,6 +95,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t const uint16_t *src = (const uint16_t *) _src; int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + if(sh<15) + sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -204,7 +212,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width) int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) - dst[i] = (dst[i]*14071 + (33561947<<4))>>14; + dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; } static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, @@ -218,6 +226,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } // *** horizontal scale Y line to temp buffer @@ -228,12 +238,12 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { - void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; + void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; const uint8_t *src = src_in[isAlpha ? 3 : 0]; if (toYV12) { - toYV12(formatConvBuffer, src, srcW, pal); + toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src= formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { c->readLumPlanar(formatConvBuffer, src_in, srcW); @@ -263,6 +273,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } } static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, @@ -273,12 +287,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 { const uint8_t *src1 = src_in[1], *src2 = src_in[2]; if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); - c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); + c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; } else if (c->readChrPlanar) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); src1= formatConvBuffer; src2= buf2; @@ -316,8 +330,6 @@ static int swScale(SwsContext *c, const uint8_t* src[], int16_t *vChrFilterPos= c->vChrFilterPos; int16_t *hLumFilterPos= c->hLumFilterPos; int16_t *hChrFilterPos= c->hChrFilterPos; - int16_t *vLumFilter= c->vLumFilter; - int16_t *vChrFilter= c->vChrFilter; int16_t *hLumFilter= c->hLumFilter; int16_t *hChrFilter= c->hChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; @@ -337,13 +349,14 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); int lastDstY; uint32_t *pal=c->pal_yuv; + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + yuv2planar1_fn yuv2plane1 = c->yuv2plane1; yuv2planarX_fn yuv2planeX = c->yuv2planeX; yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; yuv2packed1_fn yuv2packed1 = c->yuv2packed1; yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packedX_fn yuv2packedX = c->yuv2packedX; - int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); /* vars which will change and which we need to store back in the context */ int dstY= c->dstY; @@ -373,7 +386,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); - if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) { + if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { static int warnedAlready=0; //FIXME move this into the context perhaps if (flags & SWS_PRINT_INFO && !warnedAlready) { av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" @@ -382,6 +395,18 @@ static int swScale(SwsContext *c, const uint8_t* src[], } } + if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16 + || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16 + || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16 + ) { + static int warnedAlready=0; + int cpu_flags = av_get_cpu_flags(); + if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ + av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); + warnedAlready=1; + } + } + /* Note the user might start scaling the picture in the middle so this will not get executed. This is not really intended but works currently, so people might do it. */ @@ -406,15 +431,14 @@ static int swScale(SwsContext *c, const uint8_t* src[], dst[2] + dstStride[2] * chrDstY, (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, }; + int use_mmx_vfilter= c->use_mmx_vfilter; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input - - // Last line needed as input - int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; - int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1; - int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; + int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input + int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input + int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input int enough_lines; //handle holes (FAST_BILINEAR & weird filters) @@ -503,85 +527,66 @@ static int swScale(SwsContext *c, const uint8_t* src[], // hmm looks like we can't use MMX here without overwriting this array's tail ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, &yuv2packed1, &yuv2packed2, &yuv2packedX); + use_mmx_vfilter= 0; } { - const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - - if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { - const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; - int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); - for (i = 0; i < neg; i++) - tmpY[i] = lumSrcPtr[neg]; - for ( ; i < end; i++) - tmpY[i] = lumSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpY[i] = tmpY[i-1]; - lumSrcPtr = tmpY; - - if (alpSrcPtr) { - const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; - for (i = 0; i < neg; i++) - tmpA[i] = alpSrcPtr[neg]; - for ( ; i < end; i++) - tmpA[i] = alpSrcPtr[i]; - for ( ; i < vLumFilterSize; i++) - tmpA[i] = tmpA[i - 1]; - alpSrcPtr = tmpA; - } - } - if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { - const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize, - **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize; - int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); - for (i = 0; i < neg; i++) { - tmpU[i] = chrUSrcPtr[neg]; - tmpV[i] = chrVSrcPtr[neg]; - } - for ( ; i < end; i++) { - tmpU[i] = chrUSrcPtr[i]; - tmpV[i] = chrVSrcPtr[i]; - } - for ( ; i < vChrFilterSize; i++) { - tmpU[i] = tmpU[i - 1]; - tmpV[i] = tmpV[i - 1]; - } - chrUSrcPtr = tmpU; - chrVSrcPtr = tmpV; - } + const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + vLumFilter += dstY * vLumFilterSize; + vChrFilter += chrDstY * vChrFilterSize; + +// av_assert0(use_mmx_vfilter != ( +// yuv2planeX == yuv2planeX_10BE_c +// || yuv2planeX == yuv2planeX_10LE_c +// || yuv2planeX == yuv2planeX_9BE_c +// || yuv2planeX == yuv2planeX_9LE_c +// || yuv2planeX == yuv2planeX_16BE_c +// || yuv2planeX == yuv2planeX_16LE_c +// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); + + if(use_mmx_vfilter){ + vLumFilter= c->lumMmxFilter; + vChrFilter= c->chrMmxFilter; + } + if (vLumFilterSize == 1) { yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0); } if (!((dstY&chrSkipMask) || isGray(dstFormat))) { if (yuv2nv12cX) { - yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); + yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); } else if (vChrFilterSize == 1) { yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); } else { - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, - chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3); + yuv2planeX(vChrFilter, vChrFilterSize, + chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); } } if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ + if(use_mmx_vfilter){ + vLumFilter= c->alpMmxFilter; + } if (vLumFilterSize == 1) { yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0); } } @@ -643,6 +648,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ff_sws_init_input_funcs(c); + if (c->srcBpc == 8) { if (c->dstBpc <= 10) { c->hyScale = c->hcScale = hScale8To15_c; |