diff options
Diffstat (limited to 'libavcodec/simple_idct_template.c')
-rw-r--r-- | libavcodec/simple_idct_template.c | 203 |
1 files changed, 124 insertions, 79 deletions
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index d10df31..35c3132 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -3,20 +3,20 @@ * * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -62,22 +62,47 @@ #define MUL(a, b) MUL16(a, b) #define MAC(a, b, c) MAC16(a, b, c) -#elif BIT_DEPTH == 10 - -#define W1 90901 -#define W2 85627 -#define W3 77062 -#define W4 65535 -#define W5 51491 -#define W6 35468 -#define W7 18081 - -#define ROW_SHIFT 15 -#define COL_SHIFT 20 -#define DC_SHIFT 1 - -#define MUL(a, b) ((a) * (b)) -#define MAC(a, b, c) ((a) += (b) * (c)) +#elif BIT_DEPTH == 10 || BIT_DEPTH == 12 + +# if BIT_DEPTH == 10 +#define W1 22725 // 90901 +#define W2 21407 // 85627 +#define W3 19265 // 77062 +#define W4 16384 // 65535 +#define W5 12873 // 51491 +#define W6 8867 // 35468 +#define W7 4520 // 18081 + +# ifdef EXTRA_SHIFT +#define ROW_SHIFT 13 +#define COL_SHIFT 18 +#define DC_SHIFT 1 +# elif IN_IDCT_DEPTH == 32 +#define ROW_SHIFT 13 +#define COL_SHIFT 21 +#define DC_SHIFT 2 +# else +#define ROW_SHIFT 12 +#define COL_SHIFT 19 +#define DC_SHIFT 2 +# endif + +# else +#define W1 45451 +#define W2 42813 +#define W3 38531 +#define W4 32767 +#define W5 25746 +#define W6 17734 +#define W7 9041 + +#define ROW_SHIFT 16 +#define COL_SHIFT 17 +#define DC_SHIFT -1 +# endif + +#define MUL(a, b) ((int)((SUINT)(a) * (b))) +#define MAC(a, b, c) ((a) += (SUINT)(b) * (c)) #else @@ -85,18 +110,24 @@ #endif -static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) +#ifdef EXTRA_SHIFT +static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) +#else +static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift) +#endif { - int a0, a1, a2, a3, b0, b1, b2, b3; + SUINT a0, a1, a2, a3, b0, b1, b2, b3; +// TODO: Add DC-only support for int32_t input +#if IN_IDCT_DEPTH == 16 #if HAVE_FAST_64BIT #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { uint64_t temp; - if (DC_SHIFT - extra_shift > 0) { + if (DC_SHIFT - extra_shift >= 0) { temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; } else { - temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff; + temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; } temp += temp * (1 << 16); temp += temp * ((uint64_t) 1 << 32); @@ -110,10 +141,10 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) AV_RN32A(row+6) | row[1])) { uint32_t temp; - if (DC_SHIFT - extra_shift > 0) { + if (DC_SHIFT - extra_shift >= 0) { temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff; } else { - temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff; + temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff; } temp += temp * (1 << 16); AV_WN32A(row, temp); @@ -123,16 +154,17 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) return; } #endif +#endif - a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); + a0 = ((SUINT)W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); a1 = a0; a2 = a0; a3 = a0; - a0 += W2 * row[2]; - a1 += W6 * row[2]; - a2 -= W6 * row[2]; - a3 -= W2 * row[2]; + a0 += (SUINT)W2 * row[2]; + a1 += (SUINT)W6 * row[2]; + a2 -= (SUINT)W6 * row[2]; + a3 -= (SUINT)W2 * row[2]; b0 = MUL(W1, row[1]); MAC(b0, W3, row[3]); @@ -143,11 +175,15 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) b3 = MUL(W7, row[1]); MAC(b3, -W5, row[3]); +#if IN_IDCT_DEPTH == 32 + if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) { +#else if (AV_RN64A(row + 4)) { - a0 += W4*row[4] + W6*row[6]; - a1 += - W4*row[4] - W2*row[6]; - a2 += - W4*row[4] + W2*row[6]; - a3 += W4*row[4] - W6*row[6]; +#endif + a0 += (SUINT) W4*row[4] + (SUINT)W6*row[6]; + a1 += (SUINT)- W4*row[4] - (SUINT)W2*row[6]; + a2 += (SUINT)- W4*row[4] + (SUINT)W2*row[6]; + a3 += (SUINT) W4*row[4] - (SUINT)W6*row[6]; MAC(b0, W5, row[5]); MAC(b0, W7, row[7]); @@ -162,26 +198,26 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) MAC(b3, -W1, row[7]); } - row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift); - row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift); - row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift); - row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift); - row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift); - row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift); - row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift); - row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift); + row[0] = (int)(a0 + b0) >> (ROW_SHIFT + extra_shift); + row[7] = (int)(a0 - b0) >> (ROW_SHIFT + extra_shift); + row[1] = (int)(a1 + b1) >> (ROW_SHIFT + extra_shift); + row[6] = (int)(a1 - b1) >> (ROW_SHIFT + extra_shift); + row[2] = (int)(a2 + b2) >> (ROW_SHIFT + extra_shift); + row[5] = (int)(a2 - b2) >> (ROW_SHIFT + extra_shift); + row[3] = (int)(a3 + b3) >> (ROW_SHIFT + extra_shift); + row[4] = (int)(a3 - b3) >> (ROW_SHIFT + extra_shift); } #define IDCT_COLS do { \ - a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ + a0 = (SUINT)W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ a1 = a0; \ a2 = a0; \ a3 = a0; \ \ - a0 += W2*col[8*2]; \ - a1 += W6*col[8*2]; \ - a2 += -W6*col[8*2]; \ - a3 += -W2*col[8*2]; \ + a0 += (SUINT) W2*col[8*2]; \ + a1 += (SUINT) W6*col[8*2]; \ + a2 += (SUINT)-W6*col[8*2]; \ + a3 += (SUINT)-W2*col[8*2]; \ \ b0 = MUL(W1, col[8*1]); \ b1 = MUL(W3, col[8*1]); \ @@ -194,10 +230,10 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) MAC(b3, -W5, col[8*3]); \ \ if (col[8*4]) { \ - a0 += W4*col[8*4]; \ - a1 += -W4*col[8*4]; \ - a2 += -W4*col[8*4]; \ - a3 += W4*col[8*4]; \ + a0 += (SUINT) W4*col[8*4]; \ + a1 += (SUINT)-W4*col[8*4]; \ + a2 += (SUINT)-W4*col[8*4]; \ + a3 += (SUINT) W4*col[8*4]; \ } \ \ if (col[8*5]) { \ @@ -208,10 +244,10 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) } \ \ if (col[8*6]) { \ - a0 += W6*col[8*6]; \ - a1 += -W2*col[8*6]; \ - a2 += W2*col[8*6]; \ - a3 += -W6*col[8*6]; \ + a0 += (SUINT) W6*col[8*6]; \ + a1 += (SUINT)-W2*col[8*6]; \ + a2 += (SUINT) W2*col[8*6]; \ + a3 += (SUINT)-W6*col[8*6]; \ } \ \ if (col[8*7]) { \ @@ -222,32 +258,35 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) } \ } while (0) -static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, - int16_t *col) +#ifdef EXTRA_SHIFT +static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) +#else +static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, + idctin *col) { - int a0, a1, a2, a3, b0, b1, b2, b3; + SUINT a0, a1, a2, a3, b0, b1, b2, b3; IDCT_COLS; - dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a0 + b0) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a1 + b1) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a2 + b2) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a3 + b3) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a3 - b3) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a2 - b2) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a1 - b1) >> COL_SHIFT); dest += line_size; - dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT); + dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT); } -static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, - int16_t *col) +static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, + idctin *col) { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -270,7 +309,8 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); } -static inline void FUNC(idctSparseCol)(int16_t *col) +static inline void FUNC6(idctSparseCol)(idctin *col) +#endif { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -286,21 +326,24 @@ static inline void FUNC(idctSparseCol)(int16_t *col) col[56] = ((a0 - b0) >> COL_SHIFT); } -void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) +#ifndef EXTRA_SHIFT +void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_) { + idctin *block = (idctin *)block_; pixel *dest = (pixel *)dest_; int i; line_size /= sizeof(pixel); for (i = 0; i < 8; i++) - FUNC(idctRowCondDC)(block + i*8, 0); + FUNC6(idctRowCondDC)(block + i*8, 0); for (i = 0; i < 8; i++) - FUNC(idctSparseColPut)(dest + i, line_size, block + i); + FUNC6(idctSparseColPut)(dest + i, line_size, block + i); } -void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) +#if IN_IDCT_DEPTH == 16 +void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) { pixel *dest = (pixel *)dest_; int i; @@ -308,19 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc line_size /= sizeof(pixel); for (i = 0; i < 8; i++) - FUNC(idctRowCondDC)(block + i*8, 0); + FUNC6(idctRowCondDC)(block + i*8, 0); for (i = 0; i < 8; i++) - FUNC(idctSparseColAdd)(dest + i, line_size, block + i); + FUNC6(idctSparseColAdd)(dest + i, line_size, block + i); } -void FUNC(ff_simple_idct)(int16_t *block) +void FUNC6(ff_simple_idct)(int16_t *block) { int i; for (i = 0; i < 8; i++) - FUNC(idctRowCondDC)(block + i*8, 0); + FUNC6(idctRowCondDC)(block + i*8, 0); for (i = 0; i < 8; i++) - FUNC(idctSparseCol)(block + i); + FUNC6(idctSparseCol)(block + i); } +#endif +#endif |