summaryrefslogtreecommitdiffstats
path: root/libswscale
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-03-07 02:57:53 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-03-07 03:22:49 +0100
commit6df42f98746be06c883ce683563e07c9a2af983f (patch)
tree6bb893aaf179526515cfb3b1cc933721317dcf6f /libswscale
parent57986c501e8c97d4bd2e1b7ce9e9037c4ae06245 (diff)
parentb5161908e06b4497bf663510fb495ba97a6fd2b5 (diff)
downloadffmpeg-streaming-6df42f98746be06c883ce683563e07c9a2af983f.zip
ffmpeg-streaming-6df42f98746be06c883ce683563e07c9a2af983f.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: SBR DSP: fix SSE code to not use SSE2 instructions. cpu: initialize mask to -1, so that by default, optimizations are used. error_resilience: initialize s->block_index[]. svq3: protect against negative quantizers. Don't use ff_cropTbl[] for IDCT. swscale: make filterPos 32bit. FATE: add CPUFLAGS variable, mapping to -cpuflags avconv option. avconv: add -cpuflags option for setting supported cpuflags. cpu: add av_set_cpu_flags_mask(). libx264: Allow overriding the sliced threads option avconv: fix counting encoded video size. Conflicts: doc/APIchanges doc/fate.texi doc/ffmpeg.texi ffmpeg.c libavcodec/h264idct_template.c libavcodec/svq3.c libavutil/avutil.h libavutil/cpu.c libavutil/cpu.h libswscale/swscale.c tests/Makefile tests/fate-run.sh tests/regression-funcs.sh Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/ppc/swscale_altivec.c2
-rw-r--r--libswscale/swscale.c20
-rw-r--r--libswscale/swscale_internal.h12
-rw-r--r--libswscale/utils.c4
-rw-r--r--libswscale/x86/scale.asm31
-rw-r--r--libswscale/x86/swscale_mmx.c6
-rw-r--r--libswscale/x86/swscale_template.c4
7 files changed, 41 insertions, 38 deletions
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index b6aed04..df8afb0 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize,
static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
- const int16_t *filterPos, int filterSize)
+ const int32_t *filterPos, int filterSize)
{
register int i;
DECLARE_ALIGNED(16, int, tempo)[4];
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 6dc02b4..38856d9 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -63,7 +63,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride,
static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
const int16_t *filter,
- const int16_t *filterPos, int filterSize)
+ const int32_t *filterPos, int filterSize)
{
int i;
int32_t *dst = (int32_t *) _dst;
@@ -89,7 +89,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
const int16_t *filter,
- const int16_t *filterPos, int filterSize)
+ const int32_t *filterPos, int filterSize)
{
int i;
const uint16_t *src = (const uint16_t *) _src;
@@ -113,7 +113,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
// bilinear / bicubic scaling
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
- const int16_t *filter, const int16_t *filterPos,
+ const int16_t *filter, const int32_t *filterPos,
int filterSize)
{
int i;
@@ -131,7 +131,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *
}
static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
- const int16_t *filter, const int16_t *filterPos,
+ const int16_t *filter, const int32_t *filterPos,
int filterSize)
{
int i;
@@ -234,7 +234,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
const uint8_t *src_in[4], int srcW, int xInc,
const int16_t *hLumFilter,
- const int16_t *hLumFilterPos, int hLumFilterSize,
+ const int32_t *hLumFilterPos, int hLumFilterSize,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
@@ -282,7 +282,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
const uint8_t *src_in[4],
int srcW, int xInc, const int16_t *hChrFilter,
- const int16_t *hChrFilterPos, int hChrFilterSize,
+ const int32_t *hChrFilterPos, int hChrFilterSize,
uint8_t *formatConvBuffer, uint32_t *pal)
{
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
@@ -326,10 +326,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
const int chrXInc= c->chrXInc;
const enum PixelFormat dstFormat= c->dstFormat;
const int flags= c->flags;
- int16_t *vLumFilterPos= c->vLumFilterPos;
- int16_t *vChrFilterPos= c->vChrFilterPos;
- int16_t *hLumFilterPos= c->hLumFilterPos;
- int16_t *hChrFilterPos= c->hChrFilterPos;
+ int32_t *vLumFilterPos= c->vLumFilterPos;
+ int32_t *vChrFilterPos= c->vChrFilterPos;
+ int32_t *hLumFilterPos= c->hLumFilterPos;
+ int32_t *hChrFilterPos= c->hChrFilterPos;
int16_t *hLumFilter= c->hLumFilter;
int16_t *hChrFilter= c->hChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 9304356..e73806e 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -299,10 +299,10 @@ typedef struct SwsContext {
int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
- int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
- int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
- int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
- int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
+ int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
+ int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
@@ -515,10 +515,10 @@ typedef struct SwsContext {
/** @{ */
void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
- const int16_t *filterPos, int filterSize);
+ const int32_t *filterPos, int filterSize);
void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW,
const uint8_t *src, const int16_t *filter,
- const int16_t *filterPos, int filterSize);
+ const int32_t *filterPos, int filterSize);
/** @} */
/// Color range conversion function for luma plane if needed.
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 6391d51..7e8285d 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -191,7 +191,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
dist-1.0);
}
-static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
+static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc,
int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
{
@@ -207,7 +207,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
// NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end
- FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail);
+ FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail);
if (FFABS(xInc - 0x10000) <10) { // unscaled
int i;
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index 3c8b6fa..b8cea49 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -38,7 +38,7 @@ SECTION .text
; (SwsContext *c, int{16,32}_t *dst,
; int dstW, const uint{8,16}_t *src,
; const int16_t *filter,
-; const int16_t *filterPos, int filterSize);
+; const int32_t *filterPos, int filterSize);
;
; Scale one horizontal line. Input is either 8-bits width or 16-bits width
; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to
@@ -53,6 +53,9 @@ SECTION .text
cglobal hscale%1to%2_%4_%5, %6, 7, %7
%if ARCH_X86_64
movsxd r2, r2d
+%define mov32 movsxd
+%else ; x86-32
+%define mov32 mov
%endif ; x86-64
%if %2 == 19
%if mmsize == 8 ; mmx
@@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
%else ; %2 == 19
lea r1, [r1+r2*(4>>r2shr)]
%endif ; %2 == 15/19
- lea r5, [r5+r2*(2>>r2shr)]
+ lea r5, [r5+r2*(4>>r2shr)]
neg r2
.loop:
%if %3 == 4 ; filterSize == 4 scaling
; load 2x4 or 4x4 source pixels into m0/m1
- movsx r0, word [r5+r2*2+0] ; filterPos[0]
- movsx r6, word [r5+r2*2+2] ; filterPos[1]
+ mov32 r0, dword [r5+r2*4+0] ; filterPos[0]
+ mov32 r6, dword [r5+r2*4+4] ; filterPos[1]
movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}]
%if mmsize == 8
movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
@@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
%else ; %1 == 8
movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}]
%endif
- movsx r0, word [r5+r2*2+4] ; filterPos[2]
- movsx r6, word [r5+r2*2+6] ; filterPos[3]
+ mov32 r0, dword [r5+r2*4+8] ; filterPos[2]
+ mov32 r6, dword [r5+r2*4+12] ; filterPos[3]
movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}]
%if %1 > 8
movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}]
@@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
%endif ; mmx/sse2/ssse3/sse4
%else ; %3 == 8, i.e. filterSize == 8 scaling
; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5
- movsx r0, word [r5+r2*1+0] ; filterPos[0]
- movsx r6, word [r5+r2*1+2] ; filterPos[1]
+ mov32 r0, dword [r5+r2*2+0] ; filterPos[0]
+ mov32 r6, dword [r5+r2*2+4] ; filterPos[1]
movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}]
%if mmsize == 8
movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}]
@@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}]
%else ; mmsize == 16
movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}]
- movsx r0, word [r5+r2*1+4] ; filterPos[2]
- movsx r6, word [r5+r2*1+6] ; filterPos[3]
+ mov32 r0, dword [r5+r2*2+8] ; filterPos[2]
+ mov32 r6, dword [r5+r2*2+12] ; filterPos[3]
movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}]
movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}]
%endif ; mmsize == 8/16
@@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
%define r1x r1
%define filter2 r6m
%endif ; x86-32/64
- lea r5, [r5+r2*2]
+ lea r5, [r5+r2*4]
%if %2 == 15
lea r1, [r1+r2*2]
%else ; %2 == 19
@@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
neg r2
.loop:
- movsx r0, word [r5+r2*2+0] ; filterPos[0]
- movsx r1x, word [r5+r2*2+2] ; filterPos[1]
+ mov32 r0, dword [r5+r2*4+0] ; filterPos[0]
+ mov32 r1x, dword [r5+r2*4+4] ; filterPos[1]
; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)?
pxor m4, m4
pxor m5, m5
@@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
jl .innerloop
%ifidn %4, X4
- movsx r1x, word [r5+r2*2+2] ; filterPos[1]
+ mov32 r1x, dword [r5+r2*4+4] ; filterPos[1]
movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0]
sub r1x, r6 ; and first 4 srcpx of dstpx[1]
%if %1 > 8
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index 28fc004..0c8732c 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -94,8 +94,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
- int16_t *vLumFilterPos= c->vLumFilterPos;
- int16_t *vChrFilterPos= c->vChrFilterPos;
+ int32_t *vLumFilterPos= c->vLumFilterPos;
+ int32_t *vChrFilterPos= c->vChrFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *vChrFilter= c->vChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
@@ -266,7 +266,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt(
SwsContext *c, int16_t *data, \
int dstW, const uint8_t *src, \
const int16_t *filter, \
- const int16_t *filterPos, int filterSize)
+ const int32_t *filterPos, int filterSize)
#define SCALE_FUNCS(filter_n, opt) \
SCALE_FUNC(filter_n, 8, 15, opt); \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 4eee894..cf8e802 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1450,7 +1450,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
int dstWidth, const uint8_t *src,
int srcW, int xInc)
{
- int16_t *filterPos = c->hLumFilterPos;
+ int32_t *filterPos = c->hLumFilterPos;
int16_t *filter = c->hLumFilter;
void *mmx2FilterCode= c->lumMmx2FilterCode;
int i;
@@ -1546,7 +1546,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
int dstWidth, const uint8_t *src1,
const uint8_t *src2, int srcW, int xInc)
{
- int16_t *filterPos = c->hChrFilterPos;
+ int32_t *filterPos = c->hChrFilterPos;
int16_t *filter = c->hChrFilter;
void *mmx2FilterCode= c->chrMmx2FilterCode;
int i;
OpenPOWER on IntegriCloud