summaryrefslogtreecommitdiffstats
path: root/libswscale/swscale.c
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale/swscale.c')
-rw-r--r--libswscale/swscale.c150
1 files changed, 78 insertions, 72 deletions
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index b231302..0578e2d 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -27,6 +27,7 @@
#include "swscale.h"
#include "swscale_internal.h"
#include "rgb2rgb.h"
+#include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h"
@@ -47,6 +48,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
{ 64, 64, 64, 64, 64, 64, 64, 64 };
+
static av_always_inline void fillPlane(uint8_t* plane, int stride,
int width, int height,
int y, uint8_t val)
@@ -69,6 +71,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
int sh = bits - 4;
+ if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+ sh= 9;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -90,6 +95,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
const uint16_t *src = (const uint16_t *) _src;
int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ if(sh<15)
+ sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -204,7 +212,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
int i;
int32_t *dst = (int32_t *) _dst;
for (i = 0; i < width; i++)
- dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
+ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
}
static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
@@ -218,6 +226,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
xpos+=xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
}
// *** horizontal scale Y line to temp buffer
@@ -228,12 +238,12 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
- void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+ void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
const uint8_t *src = src_in[isAlpha ? 3 : 0];
if (toYV12) {
- toYV12(formatConvBuffer, src, srcW, pal);
+ toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
src= formatConvBuffer;
} else if (c->readLumPlanar && !isAlpha) {
c->readLumPlanar(formatConvBuffer, src_in, srcW);
@@ -263,6 +273,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
xpos+=xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
}
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
@@ -273,12 +287,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2
{
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
if (c->chrToYV12) {
- uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
- c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
+ uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
+ c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
src1= formatConvBuffer;
src2= buf2;
} else if (c->readChrPlanar) {
- uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
+ uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
src1= formatConvBuffer;
src2= buf2;
@@ -316,8 +330,6 @@ static int swScale(SwsContext *c, const uint8_t* src[],
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *hLumFilterPos= c->hLumFilterPos;
int16_t *hChrFilterPos= c->hChrFilterPos;
- int16_t *vLumFilter= c->vLumFilter;
- int16_t *vChrFilter= c->vChrFilter;
int16_t *hLumFilter= c->hLumFilter;
int16_t *hChrFilter= c->hChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
@@ -337,13 +349,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
int lastDstY;
uint32_t *pal=c->pal_yuv;
+ int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
+
yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
yuv2planarX_fn yuv2planeX = c->yuv2planeX;
yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
yuv2packedX_fn yuv2packedX = c->yuv2packedX;
- int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
/* vars which will change and which we need to store back in the context */
int dstY= c->dstY;
@@ -373,7 +386,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
- if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
+ if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
static int warnedAlready=0; //FIXME move this into the context perhaps
if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
@@ -382,6 +395,18 @@ static int swScale(SwsContext *c, const uint8_t* src[],
}
}
+ if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
+ || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
+ || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
+ ) {
+ static int warnedAlready=0;
+ int cpu_flags = av_get_cpu_flags();
+ if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
+ av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
+ warnedAlready=1;
+ }
+ }
+
/* Note the user might start scaling the picture in the middle so this
will not get executed. This is not really intended but works
currently, so people might do it. */
@@ -406,15 +431,14 @@ static int swScale(SwsContext *c, const uint8_t* src[],
dst[2] + dstStride[2] * chrDstY,
(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
};
+ int use_mmx_vfilter= c->use_mmx_vfilter;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
-
- // Last line needed as input
- int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
- int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
- int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
+ int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
+ int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
+ int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
int enough_lines;
//handle holes (FAST_BILINEAR & weird filters)
@@ -503,85 +527,66 @@ static int swScale(SwsContext *c, const uint8_t* src[],
// hmm looks like we can't use MMX here without overwriting this array's tail
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX);
+ use_mmx_vfilter= 0;
}
{
- const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-
- if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
- const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
- int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
- for (i = 0; i < neg; i++)
- tmpY[i] = lumSrcPtr[neg];
- for ( ; i < end; i++)
- tmpY[i] = lumSrcPtr[i];
- for ( ; i < vLumFilterSize; i++)
- tmpY[i] = tmpY[i-1];
- lumSrcPtr = tmpY;
-
- if (alpSrcPtr) {
- const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
- for (i = 0; i < neg; i++)
- tmpA[i] = alpSrcPtr[neg];
- for ( ; i < end; i++)
- tmpA[i] = alpSrcPtr[i];
- for ( ; i < vLumFilterSize; i++)
- tmpA[i] = tmpA[i - 1];
- alpSrcPtr = tmpA;
- }
- }
- if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
- const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
- **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
- int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
- for (i = 0; i < neg; i++) {
- tmpU[i] = chrUSrcPtr[neg];
- tmpV[i] = chrVSrcPtr[neg];
- }
- for ( ; i < end; i++) {
- tmpU[i] = chrUSrcPtr[i];
- tmpV[i] = chrVSrcPtr[i];
- }
- for ( ; i < vChrFilterSize; i++) {
- tmpU[i] = tmpU[i - 1];
- tmpV[i] = tmpV[i - 1];
- }
- chrUSrcPtr = tmpU;
- chrVSrcPtr = tmpV;
- }
+ const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ int16_t *vLumFilter= c->vLumFilter;
+ int16_t *vChrFilter= c->vChrFilter;
if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
+ vLumFilter += dstY * vLumFilterSize;
+ vChrFilter += chrDstY * vChrFilterSize;
+
+// av_assert0(use_mmx_vfilter != (
+// yuv2planeX == yuv2planeX_10BE_c
+// || yuv2planeX == yuv2planeX_10LE_c
+// || yuv2planeX == yuv2planeX_9BE_c
+// || yuv2planeX == yuv2planeX_9LE_c
+// || yuv2planeX == yuv2planeX_16BE_c
+// || yuv2planeX == yuv2planeX_16LE_c
+// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
+
+ if(use_mmx_vfilter){
+ vLumFilter= c->lumMmxFilter;
+ vChrFilter= c->chrMmxFilter;
+ }
+
if (vLumFilterSize == 1) {
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
+ yuv2planeX(vLumFilter, vLumFilterSize,
lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
}
if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
if (yuv2nv12cX) {
- yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
+ yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
} else if (vChrFilterSize == 1) {
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
} else {
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
+ yuv2planeX(vChrFilter, vChrFilterSize,
chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
- chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
+ yuv2planeX(vChrFilter, vChrFilterSize,
+ chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
}
}
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
+ if(use_mmx_vfilter){
+ vLumFilter= c->alpMmxFilter;
+ }
if (vLumFilterSize == 1) {
yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
+ yuv2planeX(vLumFilter, vLumFilterSize,
alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
}
}
@@ -643,6 +648,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
ff_sws_init_input_funcs(c);
+
if (c->srcBpc == 8) {
if (c->dstBpc <= 10) {
c->hyScale = c->hcScale = hScale8To15_c;
OpenPOWER on IntegriCloud