diff options
Diffstat (limited to 'libswscale/swscale.c')
-rw-r--r-- | libswscale/swscale.c | 157 |
1 files changed, 79 insertions, 78 deletions
diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 94f51cf..7cc630e 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -24,6 +24,7 @@ #include <stdio.h> #include <string.h> +#include "libavutil/avassert.h" #include "libavutil/avutil.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" @@ -71,6 +72,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; int sh = bits - 4; + if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15) + sh= 9; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -92,6 +96,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint16_t *src = (const uint16_t *) _src; int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + if(sh<15) + sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -208,7 +215,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width) int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) - dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14; + dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; } static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, @@ -222,6 +229,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha; xpos += xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } // *** horizontal scale Y line to temp buffer @@ -234,13 +243,13 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { - void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = + void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; const uint8_t *src = src_in[isAlpha ? 3 : 0]; if (toYV12) { - toYV12(formatConvBuffer, src, srcW, pal); + toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src = formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { c->readLumPlanar(formatConvBuffer, src_in, srcW); @@ -271,6 +280,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha); xpos += xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } } static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, @@ -285,13 +298,13 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, const uint8_t *src1 = src_in[1], *src2 = src_in[2]; if (c->chrToYV12) { uint8_t *buf2 = formatConvBuffer + - FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); - c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); - src1 = formatConvBuffer; - src2 = buf2; + FFALIGN(srcW*2+78, 16); + c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); + src1= formatConvBuffer; + src2= buf2; } else if (c->readChrPlanar) { uint8_t *buf2 = formatConvBuffer + - FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); + FFALIGN(srcW*2+78, 16); c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); src1 = formatConvBuffer; src2 = buf2; @@ -332,8 +345,6 @@ static int swScale(SwsContext *c, const uint8_t *src[], int32_t *vChrFilterPos = c->vChrFilterPos; int32_t *hLumFilterPos = c->hLumFilterPos; int32_t *hChrFilterPos = c->hChrFilterPos; - int16_t *vLumFilter = c->vLumFilter; - int16_t *vChrFilter = c->vChrFilter; int16_t *hLumFilter = c->hLumFilter; int16_t *hChrFilter = c->hChrFilter; int32_t *lumMmxFilter = c->lumMmxFilter; @@ -392,8 +403,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); - if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 || - dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) { + if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || + dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { static int warnedAlready = 0; // FIXME maybe move this into the context if (flags & SWS_PRINT_INFO && !warnedAlready) { av_log(c, AV_LOG_WARNING, @@ -403,6 +414,18 @@ static int swScale(SwsContext *c, const uint8_t *src[], } } + if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16 + || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16 + || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16 + ) { + static int warnedAlready=0; + int cpu_flags = av_get_cpu_flags(); + if (HAVE_MMXEXT && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ + av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); + warnedAlready=1; + } + } + /* Note the user might start scaling the picture in the middle so this * will not get executed. This is not really intended but works * currently, so people might do it. */ @@ -427,6 +450,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], dst[2] + dstStride[2] * chrDstY, (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, }; + int use_mmx_vfilter= c->use_mmx_vfilter; // First line needed as input const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); @@ -531,98 +555,74 @@ static int swScale(SwsContext *c, const uint8_t *src[], * this array's tail */ ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, &yuv2packed1, &yuv2packed2, &yuv2packedX); + use_mmx_vfilter= 0; } { - const int16_t **lumSrcPtr = (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? - (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - - if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { - const int16_t **tmpY = (const int16_t **)lumPixBuf + - 2 * vLumBufSize; - int neg = -firstLumSrcY, i; - int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); - for (i = 0; i < neg; i++) - tmpY[i] = lumSrcPtr[neg]; - for (; i < end; i++) - tmpY[i] = lumSrcPtr[i]; - for (; i < vLumFilterSize; i++) - tmpY[i] = tmpY[i - 1]; - lumSrcPtr = tmpY; - - if (alpSrcPtr) { - const int16_t **tmpA = (const int16_t **)alpPixBuf + - 2 * vLumBufSize; - for (i = 0; i < neg; i++) - tmpA[i] = alpSrcPtr[neg]; - for (; i < end; i++) - tmpA[i] = alpSrcPtr[i]; - for (; i < vLumFilterSize; i++) - tmpA[i] = tmpA[i - 1]; - alpSrcPtr = tmpA; - } - } - if (firstChrSrcY < 0 || - firstChrSrcY + vChrFilterSize > c->chrSrcH) { - const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize, - **tmpV = (const int16_t **)chrVPixBuf + 2 * vChrBufSize; - int neg = -firstChrSrcY, i; - int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); - for (i = 0; i < neg; i++) { - tmpU[i] = chrUSrcPtr[neg]; - tmpV[i] = chrVSrcPtr[neg]; - } - for (; i < end; i++) { - tmpU[i] = chrUSrcPtr[i]; - tmpV[i] = chrVSrcPtr[i]; - } - for (; i < vChrFilterSize; i++) { - tmpU[i] = tmpU[i - 1]; - tmpV[i] = tmpV[i - 1]; - } - chrUSrcPtr = tmpU; - chrVSrcPtr = tmpV; - } + (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int16_t *vLumFilter = c->vLumFilter; + int16_t *vChrFilter = c->vChrFilter; if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like const int chrSkipMask = (1 << c->chrDstVSubSample) - 1; + vLumFilter += dstY * vLumFilterSize; + vChrFilter += chrDstY * vChrFilterSize; + +// av_assert0(use_mmx_vfilter != ( +// yuv2planeX == yuv2planeX_10BE_c +// || yuv2planeX == yuv2planeX_10LE_c +// || yuv2planeX == yuv2planeX_9BE_c +// || yuv2planeX == yuv2planeX_9LE_c +// || yuv2planeX == yuv2planeX_16BE_c +// || yuv2planeX == yuv2planeX_16LE_c +// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); + + if(use_mmx_vfilter){ + vLumFilter= c->lumMmxFilter; + vChrFilter= c->chrMmxFilter; + } + if (vLumFilterSize == 1) { yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, - vLumFilterSize, lumSrcPtr, dest[0], + yuv2planeX(vLumFilter, vLumFilterSize, + lumSrcPtr, dest[0], dstW, c->lumDither8, 0); } if (!((dstY & chrSkipMask) || isGray(dstFormat))) { if (yuv2nv12cX) { - yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, + yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); } else if (vChrFilterSize == 1) { yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); } else { - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrVSrcPtr, dest[2], - chrDstW, c->chrDither8, 3); + chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); } } if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + if(use_mmx_vfilter){ + vLumFilter= c->alpMmxFilter; + } if (vLumFilterSize == 1) { yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0); } @@ -687,8 +687,9 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ff_sws_init_input_funcs(c); + if (c->srcBpc == 8) { - if (c->dstBpc <= 10) { + if (c->dstBpc <= 14) { c->hyScale = c->hcScale = hScale8To15_c; if (c->flags & SWS_FAST_BILINEAR) { c->hyscale_fast = hyscale_fast_c; @@ -698,12 +699,12 @@ static av_cold void sws_init_swScale_c(SwsContext *c) c->hyScale = c->hcScale = hScale8To19_c; } } else { - c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c + c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c : hScale16To15_c; } if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { - if (c->dstBpc <= 10) { + if (c->dstBpc <= 14) { if (c->srcRange) { c->lumConvertRange = lumRangeFromJpeg_c; c->chrConvertRange = chrRangeFromJpeg_c; |