diff options
Diffstat (limited to 'libavcodec/vc1dsp.c')
-rw-r--r-- | libavcodec/vc1dsp.c | 192 |
1 files changed, 134 insertions, 58 deletions
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index a193dd7..73e1001 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -2,20 +2,20 @@ * VC-1 and WMV3 decoder - DSP functions * Copyright (c) 2006 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -25,9 +25,12 @@ * */ +#include "libavutil/avassert.h" #include "libavutil/common.h" +#include "libavutil/intreadwrite.h" #include "h264chroma.h" #include "qpeldsp.h" +#include "rnd_avg.h" #include "vc1dsp.h" #include "startcode.h" @@ -582,10 +585,10 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, } /* Function used to do motion compensation with bicubic interpolation */ -#define VC1_MSPEL_MC(OP, OPNAME) \ +#define VC1_MSPEL_MC(OP, OP4, OPNAME) \ static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \ const uint8_t *src, \ - int stride, \ + ptrdiff_t stride, \ int hmode, \ int vmode, \ int rnd) \ @@ -640,13 +643,93 @@ static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, \ dst += stride; \ src += stride; \ } \ +}\ +static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, \ + int hmode, \ + int vmode, \ + int rnd) \ +{ \ + int i, j; \ + \ + if (vmode) { /* Horizontal filter to apply */ \ + int r; \ + \ + if (hmode) { /* Vertical filter to apply, output to tmp */ \ + static const int shift_value[] = { 0, 5, 1, 5 }; \ + int shift = (shift_value[hmode] + shift_value[vmode]) >> 1; \ + int16_t tmp[19 * 16], *tptr = tmp; \ + \ + r = (1 << (shift - 1)) + rnd - 1; \ + \ + src -= 1; \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 19; i++) \ + tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \ + src += stride; \ + tptr += 19; \ + } \ + \ + r = 64 - rnd; \ + tptr = tmp + 1; \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 16; i++) \ + OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \ + dst += stride; \ + tptr += 19; \ + } \ + \ + return; \ + } else { /* No horizontal filter, output 8 lines to dst */ \ + r = 1 - rnd; \ + \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 16; i++) \ + OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r)); \ + src += stride; \ + dst += stride; \ + } \ + return; \ + } \ + } \ + \ + /* Horizontal mode with no vertical mode */ \ + for (j = 0; j < 16; j++) { \ + for (i = 0; i < 16; i++) \ + OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd)); \ + dst += stride; \ + src += stride; \ + } \ +}\ +static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\ + int i;\ + for(i=0; i<8; i++){\ + OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\ + OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\ + int i;\ + for(i=0; i<16; i++){\ + OP4(*(uint32_t*)(block ), AV_RN32(pixels ));\ + OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\ + OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\ + OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\ + pixels+=line_size;\ + block +=line_size;\ + }\ } #define op_put(a, b) a = av_clip_uint8(b) #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1 +#define op4_avg(a, b) a = rnd_avg32(a, b) +#define op4_put(a, b) a = b -VC1_MSPEL_MC(op_put, put_) -VC1_MSPEL_MC(op_avg, avg_) +VC1_MSPEL_MC(op_put, op4_put, put_) +VC1_MSPEL_MC(op_avg, op4_avg, avg_) /* pixel functions - really are entry points to vc1_mspel_mc */ @@ -662,6 +745,18 @@ static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst, \ ptrdiff_t stride, int rnd) \ { \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ +} \ +static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, int rnd) \ +{ \ + put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ +} \ +static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst, \ + const uint8_t *src, \ + ptrdiff_t stride, int rnd) \ +{ \ + avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd); \ } PUT_VC1_MSPEL(1, 0) @@ -683,19 +778,6 @@ PUT_VC1_MSPEL(1, 3) PUT_VC1_MSPEL(2, 3) PUT_VC1_MSPEL(3, 3) - -static void put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd) -{ - ff_put_pixels8x8_c(dst, src, stride); -} - -static void avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int rnd) -{ - ff_avg_pixels8x8_c(dst, src, stride); -} - #define chroma_mc(a) \ ((A * src[a] + B * src[a + 1] + \ C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6) @@ -709,7 +791,7 @@ static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, const int D = (x) * (y); int i; - assert(x < 8 && y < 8 && x >= 0 && y >= 0); + av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); for (i = 0; i < h; i++) { dst[0] = chroma_mc(0); @@ -734,7 +816,7 @@ static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, const int D = (x) * (y); int i; - assert(x < 8 && y < 8 && x >= 0 && y >= 0); + av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); for (i = 0; i < h; i++) { dst[0] = chroma_mc(0); @@ -757,7 +839,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */, const int D = (x) * (y); int i; - assert(x < 8 && y < 8 && x >= 0 && y >= 0); + av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); for (i = 0; i < h; i++) { dst[0] = avg2(dst[0], chroma_mc(0)); @@ -783,7 +865,7 @@ static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */, const int D = ( x) * ( y); int i; - assert(x < 8 && y < 8 && x >= 0 && y >= 0); + av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0); for (i = 0; i < h; i++) { dst[0] = avg2(dst[0], chroma_mc(0)); @@ -878,6 +960,11 @@ static void sprite_v_double_twoscale_c(uint8_t *dst, } #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ +#define FN_ASSIGN(X, Y) \ + dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \ + dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \ + dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \ + dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) { @@ -902,39 +989,28 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c; dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c; - dsp->put_vc1_mspel_pixels_tab[0] = put_vc1_mspel_mc00_c; - dsp->put_vc1_mspel_pixels_tab[1] = put_vc1_mspel_mc10_c; - dsp->put_vc1_mspel_pixels_tab[2] = put_vc1_mspel_mc20_c; - dsp->put_vc1_mspel_pixels_tab[3] = put_vc1_mspel_mc30_c; - dsp->put_vc1_mspel_pixels_tab[4] = put_vc1_mspel_mc01_c; - dsp->put_vc1_mspel_pixels_tab[5] = put_vc1_mspel_mc11_c; - dsp->put_vc1_mspel_pixels_tab[6] = put_vc1_mspel_mc21_c; - dsp->put_vc1_mspel_pixels_tab[7] = put_vc1_mspel_mc31_c; - dsp->put_vc1_mspel_pixels_tab[8] = put_vc1_mspel_mc02_c; - dsp->put_vc1_mspel_pixels_tab[9] = put_vc1_mspel_mc12_c; - dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c; - dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c; - dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c; - dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; - dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; - dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; - - dsp->avg_vc1_mspel_pixels_tab[0] = avg_vc1_mspel_mc00_c; - dsp->avg_vc1_mspel_pixels_tab[1] = avg_vc1_mspel_mc10_c; - dsp->avg_vc1_mspel_pixels_tab[2] = avg_vc1_mspel_mc20_c; - dsp->avg_vc1_mspel_pixels_tab[3] = avg_vc1_mspel_mc30_c; - dsp->avg_vc1_mspel_pixels_tab[4] = avg_vc1_mspel_mc01_c; - dsp->avg_vc1_mspel_pixels_tab[5] = avg_vc1_mspel_mc11_c; - dsp->avg_vc1_mspel_pixels_tab[6] = avg_vc1_mspel_mc21_c; - dsp->avg_vc1_mspel_pixels_tab[7] = avg_vc1_mspel_mc31_c; - dsp->avg_vc1_mspel_pixels_tab[8] = avg_vc1_mspel_mc02_c; - dsp->avg_vc1_mspel_pixels_tab[9] = avg_vc1_mspel_mc12_c; - dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c; - dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c; - dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c; - dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; - dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; - dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; + dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c; + dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c; + dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c; + dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c; + FN_ASSIGN(0, 1); + FN_ASSIGN(0, 2); + FN_ASSIGN(0, 3); + + FN_ASSIGN(1, 0); + FN_ASSIGN(1, 1); + FN_ASSIGN(1, 2); + FN_ASSIGN(1, 3); + + FN_ASSIGN(2, 0); + FN_ASSIGN(2, 1); + FN_ASSIGN(2, 2); + FN_ASSIGN(2, 3); + + FN_ASSIGN(3, 0); + FN_ASSIGN(3, 1); + FN_ASSIGN(3, 2); + FN_ASSIGN(3, 3); dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c; dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c; |