diff options
Diffstat (limited to 'libavcodec/proresdec.c')
-rw-r--r-- | libavcodec/proresdec.c | 831 |
1 files changed, 362 insertions, 469 deletions
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index 83c083f..51807bc 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -1,78 +1,44 @@ /* - * Apple ProRes compatible decoder - * * Copyright (c) 2010-2011 Maxim Poliakovski + * Copyright (c) 2010-2011 Elvis Presley * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * @file - * This is a decoder for Apple ProRes 422 SD/HQ/LT/Proxy and ProRes 4444. - * It is used for storing and editing high definition video data in Apple's Final Cut Pro. - * - * @see http://wiki.multimedia.cx/index.php?title=Apple_ProRes + * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'acpo' (Proxy), 'ap4h' (4444) */ -#define A32_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once +//#define DEBUG -#include <stdint.h> +#define A32_BITSTREAM_READER -#include "libavutil/intmath.h" #include "avcodec.h" -#include "proresdsp.h" #include "get_bits.h" +#include "simple_idct.h" +#include "proresdec.h" -typedef struct { - const uint8_t *index; ///< pointers to the data of this slice - int slice_num; - int x_pos, y_pos; - int slice_width; - DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]); -} ProresThreadData; - -typedef struct { - ProresDSPContext dsp; - AVFrame picture; - ScanTable scantable; - int scantable_type; ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced - - int frame_type; ///< 0 = progressive, 1 = top-field first, 2 = bottom-field first - int pic_format; ///< 2 = 422, 3 = 444 - uint8_t qmat_luma[64]; ///< dequantization matrix for luma - uint8_t qmat_chroma[64]; ///< dequantization matrix for chroma - int qmat_changed; ///< 1 - global quantization matrices changed - int prev_slice_sf; ///< scalefactor of the previous decoded slice - DECLARE_ALIGNED(16, int16_t, qmat_luma_scaled[64]); - DECLARE_ALIGNED(16, int16_t, qmat_chroma_scaled[64]); - int total_slices; ///< total number of slices in a picture - ProresThreadData *slice_data; - int pic_num; - int chroma_factor; - int mb_chroma_factor; - int num_chroma_blocks; ///< number of chrominance blocks in a macroblock - int num_x_slices; - int num_y_slices; - int slice_width_factor; - int slice_height_factor; - int num_x_mbs; - int num_y_mbs; -} ProresContext; - +static void permute(uint8_t *dst, const uint8_t *src, const uint8_t permutation[64]) +{ + int i; + for (i = 0; i < 64; i++) + dst[i] = permutation[src[i]]; +} static const uint8_t progressive_scan[64] = { 0, 1, 8, 9, 2, 3, 10, 11, @@ -93,600 +59,527 @@ static const uint8_t interlaced_scan[64] = { 4, 12, 5, 6, 13, 20, 28, 21, 14, 7, 15, 22, 29, 36, 44, 37, 30, 23, 31, 38, 45, 52, 60, 53, - 46, 39, 47, 54, 61, 62, 55, 63 + 46, 39, 47, 54, 61, 62, 55, 63, }; - static av_cold int decode_init(AVCodecContext *avctx) { ProresContext *ctx = avctx->priv_data; + uint8_t idct_permutation[64]; - ctx->total_slices = 0; - ctx->slice_data = NULL; + avctx->bits_per_raw_sample = 10; - avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format + dsputil_init(&ctx->dsp, avctx); + ff_proresdsp_init(&ctx->prodsp); - avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE; - ff_proresdsp_init(&ctx->dsp); + avctx->coded_frame = &ctx->frame; + ctx->frame.type = FF_I_TYPE; + ctx->frame.key_frame = 1; - avctx->coded_frame = &ctx->picture; - avcodec_get_frame_defaults(&ctx->picture); - ctx->picture.type = AV_PICTURE_TYPE_I; - ctx->picture.key_frame = 1; + ff_init_scantable_permutation(idct_permutation, + ctx->prodsp.idct_permutation_type); - ctx->scantable_type = -1; // set scantable type to uninitialized - memset(ctx->qmat_luma, 4, 64); - memset(ctx->qmat_chroma, 4, 64); - ctx->prev_slice_sf = 0; + permute(ctx->progressive_scan, progressive_scan, idct_permutation); + permute(ctx->interlaced_scan, interlaced_scan, idct_permutation); return 0; } - static int decode_frame_header(ProresContext *ctx, const uint8_t *buf, const int data_size, AVCodecContext *avctx) { - int hdr_size, version, width, height, flags; + int hdr_size, width, height, flags; + int version; const uint8_t *ptr; hdr_size = AV_RB16(buf); + av_dlog(avctx, "header size %d\n", hdr_size); if (hdr_size > data_size) { - av_log(avctx, AV_LOG_ERROR, "frame data too small\n"); - return AVERROR_INVALIDDATA; + av_log(avctx, AV_LOG_ERROR, "error, wrong header size\n"); + return -1; } version = AV_RB16(buf + 2); - if (version >= 2) { - av_log(avctx, AV_LOG_ERROR, - "unsupported header version: %d\n", version); - return AVERROR_INVALIDDATA; + av_dlog(avctx, "%.4s version %d\n", buf+4, version); + if (version > 1) { + av_log(avctx, AV_LOG_ERROR, "unsupported version: %d\n", version); + return -1; } width = AV_RB16(buf + 8); height = AV_RB16(buf + 10); if (width != avctx->width || height != avctx->height) { - av_log(avctx, AV_LOG_ERROR, - "picture dimension changed: old: %d x %d, new: %d x %d\n", + av_log(avctx, AV_LOG_ERROR, "picture resolution change: %dx%d -> %dx%d\n", avctx->width, avctx->height, width, height); - return AVERROR_INVALIDDATA; + return -1; } ctx->frame_type = (buf[12] >> 2) & 3; - if (ctx->frame_type > 2) { - av_log(avctx, AV_LOG_ERROR, - "unsupported frame type: %d\n", ctx->frame_type); - return AVERROR_INVALIDDATA; - } - ctx->chroma_factor = (buf[12] >> 6) & 3; - ctx->mb_chroma_factor = ctx->chroma_factor + 2; - ctx->num_chroma_blocks = (1 << ctx->chroma_factor) >> 1; - switch (ctx->chroma_factor) { - case 2: - avctx->pix_fmt = PIX_FMT_YUV422P10; - break; - case 3: - avctx->pix_fmt = PIX_FMT_YUV444P10; - break; - default: - av_log(avctx, AV_LOG_ERROR, - "unsupported picture format: %d\n", ctx->pic_format); - return AVERROR_INVALIDDATA; - } + av_dlog(avctx, "frame type %d\n", ctx->frame_type); - if (ctx->scantable_type != ctx->frame_type) { - if (!ctx->frame_type) - ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, - progressive_scan); - else - ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, - interlaced_scan); - ctx->scantable_type = ctx->frame_type; + if (ctx->frame_type == 0) { + ctx->scan = ctx->progressive_scan; // permuted + } else { + ctx->scan = ctx->interlaced_scan; // permuted + ctx->frame.interlaced_frame = 1; + ctx->frame.top_field_first = ctx->frame_type == 1; } - if (ctx->frame_type) { /* if interlaced */ - ctx->picture.interlaced_frame = 1; - ctx->picture.top_field_first = ctx->frame_type & 1; - } + avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV422P10; - ctx->qmat_changed = 0; ptr = buf + 20; flags = buf[19]; + av_dlog(avctx, "flags %x\n", flags); + if (flags & 2) { - if (ptr - buf > hdr_size - 64) { - av_log(avctx, AV_LOG_ERROR, "header data too small\n"); - return AVERROR_INVALIDDATA; - } - if (memcmp(ctx->qmat_luma, ptr, 64)) { - memcpy(ctx->qmat_luma, ptr, 64); - ctx->qmat_changed = 1; - } + permute(ctx->qmat_luma, ctx->prodsp.idct_permutation, ptr); ptr += 64; } else { memset(ctx->qmat_luma, 4, 64); - ctx->qmat_changed = 1; } if (flags & 1) { - if (ptr - buf > hdr_size - 64) { - av_log(avctx, AV_LOG_ERROR, "header data too small\n"); - return -1; - } - if (memcmp(ctx->qmat_chroma, ptr, 64)) { - memcpy(ctx->qmat_chroma, ptr, 64); - ctx->qmat_changed = 1; - } + permute(ctx->qmat_chroma, ctx->prodsp.idct_permutation, ptr); } else { memset(ctx->qmat_chroma, 4, 64); - ctx->qmat_changed = 1; } return hdr_size; } - -static int decode_picture_header(ProresContext *ctx, const uint8_t *buf, - const int data_size, AVCodecContext *avctx) +static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, const int buf_size) { - int i, hdr_size, pic_data_size, num_slices; - int slice_width_factor, slice_height_factor; - int remainder, num_x_slices; + ProresContext *ctx = avctx->priv_data; + int i, hdr_size, slice_count; + unsigned pic_data_size; + int log2_slice_mb_width, log2_slice_mb_height; + int slice_mb_count, mb_x, mb_y; const uint8_t *data_ptr, *index_ptr; - hdr_size = data_size > 0 ? buf[0] >> 3 : 0; - if (hdr_size < 8 || hdr_size > data_size) { - av_log(avctx, AV_LOG_ERROR, "picture header too small\n"); - return AVERROR_INVALIDDATA; + hdr_size = buf[0] >> 3; + if (hdr_size < 8 || hdr_size > buf_size) { + av_log(avctx, AV_LOG_ERROR, "error, wrong picture header size\n"); + return -1; } pic_data_size = AV_RB32(buf + 1); - if (pic_data_size > data_size) { - av_log(avctx, AV_LOG_ERROR, "picture data too small\n"); - return AVERROR_INVALIDDATA; + if (pic_data_size > buf_size) { + av_log(avctx, AV_LOG_ERROR, "error, wrong picture data size\n"); + return -1; } - slice_width_factor = buf[7] >> 4; - slice_height_factor = buf[7] & 0xF; - if (slice_width_factor > 3 || slice_height_factor) { - av_log(avctx, AV_LOG_ERROR, - "unsupported slice dimension: %d x %d\n", - 1 << slice_width_factor, 1 << slice_height_factor); - return AVERROR_INVALIDDATA; + log2_slice_mb_width = buf[7] >> 4; + log2_slice_mb_height = buf[7] & 0xF; + if (log2_slice_mb_width > 3 || log2_slice_mb_height) { + av_log(avctx, AV_LOG_ERROR, "unsupported slice resolution: %dx%d\n", + 1 << log2_slice_mb_width, 1 << log2_slice_mb_height); + return -1; } - ctx->slice_width_factor = slice_width_factor; - ctx->slice_height_factor = slice_height_factor; + ctx->mb_width = (avctx->width + 15) >> 4; + if (ctx->frame_type) + ctx->mb_height = (avctx->height + 31) >> 5; + else + ctx->mb_height = (avctx->height + 15) >> 4; - ctx->num_x_mbs = (avctx->width + 15) >> 4; - ctx->num_y_mbs = (avctx->height + - (1 << (4 + ctx->picture.interlaced_frame)) - 1) >> - (4 + ctx->picture.interlaced_frame); + slice_count = AV_RB16(buf + 5); - remainder = ctx->num_x_mbs & ((1 << slice_width_factor) - 1); - num_x_slices = (ctx->num_x_mbs >> slice_width_factor) + (remainder & 1) + - ((remainder >> 1) & 1) + ((remainder >> 2) & 1); - - num_slices = num_x_slices * ctx->num_y_mbs; - if (num_slices != AV_RB16(buf + 5)) { - av_log(avctx, AV_LOG_ERROR, "invalid number of slices\n"); - return AVERROR_INVALIDDATA; - } - - if (ctx->total_slices != num_slices) { - av_freep(&ctx->slice_data); - ctx->slice_data = av_malloc((num_slices + 1) * sizeof(ctx->slice_data[0])); - if (!ctx->slice_data) + if (ctx->slice_count != slice_count || !ctx->slices) { + av_freep(&ctx->slices); + ctx->slices = av_mallocz(slice_count * sizeof(*ctx->slices)); + if (!ctx->slices) return AVERROR(ENOMEM); - ctx->total_slices = num_slices; + ctx->slice_count = slice_count; } - if (hdr_size + num_slices * 2 > data_size) { - av_log(avctx, AV_LOG_ERROR, "slice table too small\n"); - return AVERROR_INVALIDDATA; + if (!slice_count) + return AVERROR(EINVAL); + + if (hdr_size + slice_count*2 > buf_size) { + av_log(avctx, AV_LOG_ERROR, "error, wrong slice count\n"); + return -1; } - /* parse slice table allowing quick access to the slice data */ + // parse slice information index_ptr = buf + hdr_size; - data_ptr = index_ptr + num_slices * 2; + data_ptr = index_ptr + slice_count*2; - for (i = 0; i < num_slices; i++) { - ctx->slice_data[i].index = data_ptr; - data_ptr += AV_RB16(index_ptr + i * 2); - } - ctx->slice_data[i].index = data_ptr; + slice_mb_count = 1 << log2_slice_mb_width; + mb_x = 0; + mb_y = 0; - if (data_ptr > buf + data_size) { - av_log(avctx, AV_LOG_ERROR, "out of slice data\n"); - return -1; - } + for (i = 0; i < slice_count; i++) { + SliceContext *slice = &ctx->slices[i]; - return pic_data_size; -} + slice->data = data_ptr; + data_ptr += AV_RB16(index_ptr + i*2); + while (ctx->mb_width - mb_x < slice_mb_count) + slice_mb_count >>= 1; -/** - * Read an unsigned rice/exp golomb codeword. - */ -static inline int decode_vlc_codeword(GetBitContext *gb, uint8_t codebook) -{ - unsigned int rice_order, exp_order, switch_bits; - unsigned int buf, code; - int log, prefix_len, len; + slice->mb_x = mb_x; + slice->mb_y = mb_y; + slice->mb_count = slice_mb_count; + slice->data_size = data_ptr - slice->data; - OPEN_READER(re, gb); - UPDATE_CACHE(re, gb); - buf = GET_CACHE(re, gb); - - /* number of prefix bits to switch between Rice and expGolomb */ - switch_bits = (codebook & 3) + 1; - rice_order = codebook >> 5; /* rice code order */ - exp_order = (codebook >> 2) & 7; /* exp golomb code order */ - - log = 31 - av_log2(buf); /* count prefix bits (zeroes) */ - - if (log < switch_bits) { /* ok, we got a rice code */ - if (!rice_order) { - /* shortcut for faster decoding of rice codes without remainder */ - code = log; - LAST_SKIP_BITS(re, gb, log + 1); - } else { - prefix_len = log + 1; - code = (log << rice_order) + NEG_USR32(buf << prefix_len, rice_order); - LAST_SKIP_BITS(re, gb, prefix_len + rice_order); + if (slice->data_size < 6) { + av_log(avctx, AV_LOG_ERROR, "error, wrong slice data size\n"); + return -1; + } + + mb_x += slice_mb_count; + if (mb_x == ctx->mb_width) { + slice_mb_count = 1 << log2_slice_mb_width; + mb_x = 0; + mb_y++; + } + if (data_ptr > buf + buf_size) { + av_log(avctx, AV_LOG_ERROR, "error, slice out of bounds\n"); + return -1; } - } else { /* otherwise we got a exp golomb code */ - len = (log << 1) - switch_bits + exp_order + 1; - code = NEG_USR32(buf, len) - (1 << exp_order) + (switch_bits << rice_order); - LAST_SKIP_BITS(re, gb, len); } - CLOSE_READER(re, gb); + if (mb_x || mb_y != ctx->mb_height) { + av_log(avctx, AV_LOG_ERROR, "error wrong mb count y %d h %d\n", + mb_y, ctx->mb_height); + return -1; + } - return code; + return pic_data_size; } -#define LSB2SIGN(x) (-((x) & 1)) -#define TOSIGNED(x) (((x) >> 1) ^ LSB2SIGN(x)) - -#define FIRST_DC_CB 0xB8 // rice_order = 5, exp_golomb_order = 6, switch_bits = 0 - -static uint8_t dc_codebook[4] = { - 0x04, // rice_order = 0, exp_golomb_order = 1, switch_bits = 0 - 0x28, // rice_order = 1, exp_golomb_order = 2, switch_bits = 0 - 0x4D, // rice_order = 2, exp_golomb_order = 3, switch_bits = 1 - 0x70 // rice_order = 3, exp_golomb_order = 4, switch_bits = 0 -}; - - -/** - * Decode DC coefficients for all blocks in a slice. - */ -static inline void decode_dc_coeffs(GetBitContext *gb, DCTELEM *out, - int nblocks) +#define DECODE_CODEWORD(val, codebook) \ + do { \ + unsigned int rice_order, exp_order, switch_bits; \ + unsigned int q, buf, bits; \ + \ + UPDATE_CACHE(re, gb); \ + buf = GET_CACHE(re, gb); \ + \ + /* number of bits to switch between rice and exp golomb */ \ + switch_bits = codebook & 3; \ + rice_order = codebook >> 5; \ + exp_order = (codebook >> 2) & 7; \ + \ + q = 31 - av_log2(buf); \ + \ + if (q > switch_bits) { /* exp golomb */ \ + bits = exp_order - switch_bits + (q<<1); \ + val = SHOW_UBITS(re, gb, bits) - (1 << exp_order) + \ + ((switch_bits + 1) << rice_order); \ + SKIP_BITS(re, gb, bits); \ + } else if (rice_order) { \ + SKIP_BITS(re, gb, q+1); \ + val = (q << rice_order) + SHOW_UBITS(re, gb, rice_order); \ + SKIP_BITS(re, gb, rice_order); \ + } else { \ + val = q; \ + SKIP_BITS(re, gb, q+1); \ + } \ + } while (0); \ + +#define TOSIGNED(x) (((x) >> 1) ^ (-((x) & 1))) + +#define FIRST_DC_CB 0xB8 + +static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70}; + +static av_always_inline void decode_dc_coeffs(GetBitContext *gb, DCTELEM *out, + int blocks_per_slice) { DCTELEM prev_dc; - int i, sign; - int16_t delta; - unsigned int code; + int code, i, sign; - code = decode_vlc_codeword(gb, FIRST_DC_CB); - out[0] = prev_dc = TOSIGNED(code); + OPEN_READER(re, gb); - out += 64; /* move to the DC coeff of the next block */ - delta = 3; + DECODE_CODEWORD(code, FIRST_DC_CB); + prev_dc = TOSIGNED(code); + out[0] = prev_dc; - for (i = 1; i < nblocks; i++, out += 64) { - code = decode_vlc_codeword(gb, dc_codebook[FFMIN(FFABS(delta), 3)]); + out += 64; // dc coeff for the next block - sign = -(((delta >> 15) & 1) ^ (code & 1)); - delta = (((code + 1) >> 1) ^ sign) - sign; - prev_dc += delta; - out[0] = prev_dc; + code = 5; + sign = 0; + for (i = 1; i < blocks_per_slice; i++, out += 64) { + DECODE_CODEWORD(code, dc_codebook[FFMIN(code, 6)]); + if(code) sign ^= -(code & 1); + else sign = 0; + prev_dc += (((code + 1) >> 1) ^ sign) - sign; + out[0] = prev_dc; } + CLOSE_READER(re, gb); } +// adaptive codebook switching lut according to previous run/level values +static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29, 0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C }; +static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28, 0x28, 0x28, 0x28, 0x4C }; -static uint8_t ac_codebook[7] = { - 0x04, // rice_order = 0, exp_golomb_order = 1, switch_bits = 0 - 0x28, // rice_order = 1, exp_golomb_order = 2, switch_bits = 0 - 0x4C, // rice_order = 2, exp_golomb_order = 3, switch_bits = 0 - 0x05, // rice_order = 0, exp_golomb_order = 1, switch_bits = 1 - 0x29, // rice_order = 1, exp_golomb_order = 2, switch_bits = 1 - 0x06, // rice_order = 0, exp_golomb_order = 1, switch_bits = 2 - 0x0A, // rice_order = 0, exp_golomb_order = 2, switch_bits = 2 -}; - -/** - * Lookup tables for adaptive switching between codebooks - * according with previous run/level value. - */ -static uint8_t run_to_cb_index[16] = - { 5, 5, 3, 3, 0, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 2 }; - -static uint8_t lev_to_cb_index[10] = { 0, 6, 3, 5, 0, 1, 1, 1, 1, 2 }; - - -/** - * Decode AC coefficients for all blocks in a slice. - */ -static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out, - int blocks_per_slice, - int plane_size_factor, - const uint8_t *scan) +static av_always_inline void decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb, + DCTELEM *out, int blocks_per_slice) { - int pos, block_mask, run, level, sign, run_cb_index, lev_cb_index; - int max_coeffs, bits_left; + ProresContext *ctx = avctx->priv_data; + int block_mask, sign; + unsigned pos, run, level; + int max_coeffs, i, bits_left; + int log2_block_count = av_log2(blocks_per_slice); + + OPEN_READER(re, gb); - /* set initial prediction values */ run = 4; level = 2; - max_coeffs = blocks_per_slice << 6; + max_coeffs = 64 << log2_block_count; block_mask = blocks_per_slice - 1; - for (pos = blocks_per_slice - 1; pos < max_coeffs;) { - run_cb_index = run_to_cb_index[FFMIN(run, 15)]; - lev_cb_index = lev_to_cb_index[FFMIN(level, 9)]; - - bits_left = get_bits_left(gb); - if (bits_left <= 0 || (bits_left <= 8 && !show_bits(gb, bits_left))) - return; - - run = decode_vlc_codeword(gb, ac_codebook[run_cb_index]); + for (pos = block_mask;;) { + bits_left = gb->size_in_bits - (((uint8_t*)re_buffer_ptr - gb->buffer)*8 - 32 + re_bit_count); + if (!bits_left || (bits_left < 32 && !SHOW_UBITS(re, gb, bits_left))) + break; - bits_left = get_bits_left(gb); - if (bits_left <= 0 || (bits_left <= 8 && !show_bits(gb, bits_left))) + DECODE_CODEWORD(run, run_to_cb[FFMIN(run, 15)]); + pos += run + 1; + if (pos >= max_coeffs) { + av_log(avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", pos, max_coeffs); return; + } - level = decode_vlc_codeword(gb, ac_codebook[lev_cb_index]) + 1; + DECODE_CODEWORD(level, lev_to_cb[FFMIN(level, 9)]); + level += 1; - pos += run + 1; - if (pos >= max_coeffs) - break; + i = pos >> log2_block_count; - sign = get_sbits(gb, 1); - out[((pos & block_mask) << 6) + scan[pos >> plane_size_factor]] = - (level ^ sign) - sign; + sign = SHOW_SBITS(re, gb, 1); + SKIP_BITS(re, gb, 1); + out[((pos & block_mask) << 6) + ctx->scan[i]] = ((level ^ sign) - sign); } -} + CLOSE_READER(re, gb); +} -/** - * Decode a slice plane (luma or chroma). - */ -static void decode_slice_plane(ProresContext *ctx, ProresThreadData *td, - const uint8_t *buf, - int data_size, uint16_t *out_ptr, - int linesize, int mbs_per_slice, - int blocks_per_mb, int plane_size_factor, - const int16_t *qmat) +static void decode_slice_luma(AVCodecContext *avctx, SliceContext *slice, + uint8_t *dst, int dst_stride, + const uint8_t *buf, unsigned buf_size, + const int16_t *qmat) { + ProresContext *ctx = avctx->priv_data; + LOCAL_ALIGNED_16(DCTELEM, blocks, [8*4*64]); + DCTELEM *block; GetBitContext gb; - DCTELEM *block_ptr; - int mb_num, blocks_per_slice; + int i, blocks_per_slice = slice->mb_count<<2; - blocks_per_slice = mbs_per_slice * blocks_per_mb; + for (i = 0; i < blocks_per_slice; i++) + ctx->dsp.clear_block(blocks+(i<<6)); - memset(td->blocks, 0, 8 * 4 * 64 * sizeof(*td->blocks)); + init_get_bits(&gb, buf, buf_size << 3); - init_get_bits(&gb, buf, data_size << 3); + decode_dc_coeffs(&gb, blocks, blocks_per_slice); + decode_ac_coeffs(avctx, &gb, blocks, blocks_per_slice); - decode_dc_coeffs(&gb, td->blocks, blocks_per_slice); + block = blocks; + for (i = 0; i < slice->mb_count; i++) { + ctx->prodsp.idct_put(dst, dst_stride, block+(0<<6), qmat); + ctx->prodsp.idct_put(dst+16, dst_stride, block+(1<<6), qmat); + ctx->prodsp.idct_put(dst+8*dst_stride, dst_stride, block+(2<<6), qmat); + ctx->prodsp.idct_put(dst+8*dst_stride+16, dst_stride, block+(3<<6), qmat); + block += 4*64; + dst += 32; + } +} - decode_ac_coeffs(&gb, td->blocks, blocks_per_slice, - plane_size_factor, ctx->scantable.permutated); +static void decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice, + uint8_t *dst, int dst_stride, + const uint8_t *buf, unsigned buf_size, + const int16_t *qmat, int log2_blocks_per_mb) +{ + ProresContext *ctx = avctx->priv_data; + LOCAL_ALIGNED_16(DCTELEM, blocks, [8*4*64]); + DCTELEM *block; + GetBitContext gb; + int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb; - /* inverse quantization, inverse transform and output */ - block_ptr = td->blocks; + for (i = 0; i < blocks_per_slice; i++) + ctx->dsp.clear_block(blocks+(i<<6)); - for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) { - ctx->dsp.idct_put(out_ptr, linesize, block_ptr, qmat); - block_ptr += 64; - if (blocks_per_mb > 2) { - ctx->dsp.idct_put(out_ptr + 8, linesize, block_ptr, qmat); - block_ptr += 64; - } - ctx->dsp.idct_put(out_ptr + linesize * 4, linesize, block_ptr, qmat); - block_ptr += 64; - if (blocks_per_mb > 2) { - ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat); - block_ptr += 64; + init_get_bits(&gb, buf, buf_size << 3); + + decode_dc_coeffs(&gb, blocks, blocks_per_slice); + decode_ac_coeffs(avctx, &gb, blocks, blocks_per_slice); + + block = blocks; + for (i = 0; i < slice->mb_count; i++) { + for (j = 0; j < log2_blocks_per_mb; j++) { + ctx->prodsp.idct_put(dst, dst_stride, block+(0<<6), qmat); + ctx->prodsp.idct_put(dst+8*dst_stride, dst_stride, block+(1<<6), qmat); + block += 2*64; + dst += 16; } } } - -static int decode_slice(AVCodecContext *avctx, ProresThreadData *td) +static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) { ProresContext *ctx = avctx->priv_data; - int mb_x_pos = td->x_pos; - int mb_y_pos = td->y_pos; - int pic_num = ctx->pic_num; - int slice_num = td->slice_num; - int mbs_per_slice = td->slice_width; - const uint8_t *buf; - uint8_t *y_data, *u_data, *v_data; + SliceContext *slice = &ctx->slices[jobnr]; + const uint8_t *buf = slice->data; AVFrame *pic = avctx->coded_frame; - int i, sf, slice_width_factor; - int slice_data_size, hdr_size, y_data_size, u_data_size, v_data_size; - int y_linesize, u_linesize, v_linesize; - - buf = ctx->slice_data[slice_num].index; - slice_data_size = ctx->slice_data[slice_num + 1].index - buf; - - slice_width_factor = av_log2(mbs_per_slice); - - y_data = pic->data[0]; - u_data = pic->data[1]; - v_data = pic->data[2]; - y_linesize = pic->linesize[0]; - u_linesize = pic->linesize[1]; - v_linesize = pic->linesize[2]; - - if (pic->interlaced_frame) { - if (!(pic_num ^ pic->top_field_first)) { - y_data += y_linesize; - u_data += u_linesize; - v_data += v_linesize; - } - y_linesize <<= 1; - u_linesize <<= 1; - v_linesize <<= 1; + int i, hdr_size, qscale, log2_chroma_blocks_per_mb; + int luma_stride, chroma_stride; + int y_data_size, u_data_size, v_data_size; + uint8_t *dest_y, *dest_u, *dest_v; + int16_t qmat_luma_scaled[64]; + int16_t qmat_chroma_scaled[64]; + int mb_x_shift; + + //av_log(avctx, AV_LOG_INFO, "slice %d mb width %d mb x %d y %d\n", + // jobnr, slice->mb_count, slice->mb_x, slice->mb_y); + + // slice header + hdr_size = buf[0] >> 3; + qscale = av_clip(buf[1], 1, 224); + qscale = qscale > 128 ? qscale - 96 << 2: qscale; + y_data_size = AV_RB16(buf + 2); + u_data_size = AV_RB16(buf + 4); + v_data_size = slice->data_size - y_data_size - u_data_size - hdr_size; + if (hdr_size > 7) v_data_size = AV_RB16(buf + 6); + + if (y_data_size < 0 || u_data_size < 0 || v_data_size < 0) { + av_log(avctx, AV_LOG_ERROR, "invalid plane data size\n"); + return -1; } - if (slice_data_size < 6) { - av_log(avctx, AV_LOG_ERROR, "slice data too small\n"); - return AVERROR_INVALIDDATA; + buf += hdr_size; + + for (i = 0; i < 64; i++) { + qmat_luma_scaled [i] = ctx->qmat_luma [i] * qscale; + qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * qscale; } - /* parse slice header */ - hdr_size = buf[0] >> 3; - y_data_size = AV_RB16(buf + 2); - u_data_size = AV_RB16(buf + 4); - v_data_size = slice_data_size - y_data_size - u_data_size - hdr_size; + if (ctx->frame_type == 0) { + luma_stride = pic->linesize[0]; + chroma_stride = pic->linesize[1]; + } else { + luma_stride = pic->linesize[0] << 1; + chroma_stride = pic->linesize[1] << 1; + } - if (v_data_size < 0 || hdr_size < 6) { - av_log(avctx, AV_LOG_ERROR, "invalid data size\n"); - return AVERROR_INVALIDDATA; + if (avctx->pix_fmt == PIX_FMT_YUV444P10) { + mb_x_shift = 5; + log2_chroma_blocks_per_mb = 2; + } else { + mb_x_shift = 4; + log2_chroma_blocks_per_mb = 1; } - sf = av_clip(buf[1], 1, 224); - sf = sf > 128 ? (sf - 96) << 2 : sf; + dest_y = pic->data[0] + (slice->mb_y << 4) * luma_stride + (slice->mb_x << 5); + dest_u = pic->data[1] + (slice->mb_y << 4) * chroma_stride + (slice->mb_x << mb_x_shift); + dest_v = pic->data[2] + (slice->mb_y << 4) * chroma_stride + (slice->mb_x << mb_x_shift); - /* scale quantization matrixes according with slice's scale factor */ - /* TODO: this can be SIMD-optimized alot */ - if (ctx->qmat_changed || sf != ctx->prev_slice_sf) { - ctx->prev_slice_sf = sf; - for (i = 0; i < 64; i++) { - ctx->qmat_luma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_luma[i] * sf; - ctx->qmat_chroma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_chroma[i] * sf; - } + if (ctx->frame_type && ctx->first_field ^ ctx->frame.top_field_first) { + dest_y += pic->linesize[0]; + dest_u += pic->linesize[1]; + dest_v += pic->linesize[2]; } - /* decode luma plane */ - decode_slice_plane(ctx, td, buf + hdr_size, y_data_size, - (uint16_t*) (y_data + (mb_y_pos << 4) * y_linesize + - (mb_x_pos << 5)), y_linesize, - mbs_per_slice, 4, slice_width_factor + 2, - ctx->qmat_luma_scaled); - - /* decode U chroma plane */ - decode_slice_plane(ctx, td, buf + hdr_size + y_data_size, u_data_size, - (uint16_t*) (u_data + (mb_y_pos << 4) * u_linesize + - (mb_x_pos << ctx->mb_chroma_factor)), - u_linesize, mbs_per_slice, ctx->num_chroma_blocks, - slice_width_factor + ctx->chroma_factor - 1, - ctx->qmat_chroma_scaled); - - /* decode V chroma plane */ - decode_slice_plane(ctx, td, buf + hdr_size + y_data_size + u_data_size, - v_data_size, - (uint16_t*) (v_data + (mb_y_pos << 4) * v_linesize + - (mb_x_pos << ctx->mb_chroma_factor)), - v_linesize, mbs_per_slice, ctx->num_chroma_blocks, - slice_width_factor + ctx->chroma_factor - 1, - ctx->qmat_chroma_scaled); + decode_slice_luma(avctx, slice, dest_y, luma_stride, + buf, y_data_size, qmat_luma_scaled); + + if (!(avctx->flags & CODEC_FLAG_GRAY)) { + decode_slice_chroma(avctx, slice, dest_u, chroma_stride, + buf + y_data_size, u_data_size, + qmat_chroma_scaled, log2_chroma_blocks_per_mb); + decode_slice_chroma(avctx, slice, dest_v, chroma_stride, + buf + y_data_size + u_data_size, v_data_size, + qmat_chroma_scaled, log2_chroma_blocks_per_mb); + } return 0; } - -static int decode_picture(ProresContext *ctx, int pic_num, - AVCodecContext *avctx) +static int decode_picture(AVCodecContext *avctx) { - int slice_num, slice_width, x_pos, y_pos; - - slice_num = 0; - - ctx->pic_num = pic_num; - for (y_pos = 0; y_pos < ctx->num_y_mbs; y_pos++) { - slice_width = 1 << ctx->slice_width_factor; - - for (x_pos = 0; x_pos < ctx->num_x_mbs && slice_width; - x_pos += slice_width) { - while (ctx->num_x_mbs - x_pos < slice_width) - slice_width >>= 1; + ProresContext *ctx = avctx->priv_data; + int i, threads_ret[ctx->slice_count]; - ctx->slice_data[slice_num].slice_num = slice_num; - ctx->slice_data[slice_num].x_pos = x_pos; - ctx->slice_data[slice_num].y_pos = y_pos; - ctx->slice_data[slice_num].slice_width = slice_width; + avctx->execute2(avctx, decode_slice_thread, NULL, threads_ret, ctx->slice_count); - slice_num++; - } - } + for (i = 0; i < ctx->slice_count; i++) + if (threads_ret[i] < 0) + return threads_ret[i]; - return avctx->execute(avctx, (void *) decode_slice, - ctx->slice_data, NULL, slice_num, - sizeof(ctx->slice_data[0])); + return 0; } - -#define FRAME_ID MKBETAG('i', 'c', 'p', 'f') -#define MOVE_DATA_PTR(nbytes) buf += (nbytes); buf_size -= (nbytes) - static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt) { ProresContext *ctx = avctx->priv_data; - AVFrame *picture = avctx->coded_frame; + AVFrame *frame = avctx->coded_frame; const uint8_t *buf = avpkt->data; - int buf_size = avpkt->size; - int frame_hdr_size, pic_num, pic_data_size; - - /* check frame atom container */ - if (buf_size < 28 || buf_size < AV_RB32(buf) || - AV_RB32(buf + 4) != FRAME_ID) { - av_log(avctx, AV_LOG_ERROR, "invalid frame\n"); - return AVERROR_INVALIDDATA; + int buf_size = avpkt->size; + int frame_hdr_size, pic_size; + + if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) { + av_log(avctx, AV_LOG_ERROR, "invalid frame header\n"); + return -1; } - MOVE_DATA_PTR(8); + ctx->first_field = 1; + + buf += 8; + buf_size -= 8; frame_hdr_size = decode_frame_header(ctx, buf, buf_size, avctx); if (frame_hdr_size < 0) - return AVERROR_INVALIDDATA; + return -1; + + buf += frame_hdr_size; + buf_size -= frame_hdr_size; - MOVE_DATA_PTR(frame_hdr_size); + if (frame->data[0]) + avctx->release_buffer(avctx, frame); - if (picture->data[0]) - avctx->release_buffer(avctx, picture); + if (avctx->get_buffer(avctx, frame) < 0) + return -1; - picture->reference = 0; - if (avctx->get_buffer(avctx, picture) < 0) + decode_picture: + pic_size = decode_picture_header(avctx, buf, buf_size); + if (pic_size < 0) { + av_log(avctx, AV_LOG_ERROR, "error decoding picture header\n"); return -1; + } - for (pic_num = 0; ctx->picture.interlaced_frame - pic_num + 1; pic_num++) { - pic_data_size = decode_picture_header(ctx, buf, buf_size, avctx); - if (pic_data_size < 0) - return AVERROR_INVALIDDATA; + if (decode_picture(avctx)) { + av_log(avctx, AV_LOG_ERROR, "error decoding picture\n"); + return -1; + } - if (decode_picture(ctx, pic_num, avctx)) - return -1; + buf += pic_size; + buf_size -= pic_size; - MOVE_DATA_PTR(pic_data_size); + if (ctx->frame_type && buf_size > 0 && ctx->first_field) { + ctx->first_field = 0; + goto decode_picture; } - *data_size = sizeof(AVPicture); - *(AVFrame*) data = *avctx->coded_frame; + *data_size = sizeof(AVFrame); + *(AVFrame*)data = *frame; return avpkt->size; } - static av_cold int decode_close(AVCodecContext *avctx) { ProresContext *ctx = avctx->priv_data; - if (ctx->picture.data[0]) - avctx->release_buffer(avctx, &ctx->picture); - - av_freep(&ctx->slice_data); + AVFrame *frame = avctx->coded_frame; + if (frame->data[0]) + avctx->release_buffer(avctx, frame); + av_freep(&ctx->slices); return 0; } - AVCodec ff_prores_decoder = { .name = "prores", .type = AVMEDIA_TYPE_VIDEO, @@ -695,6 +588,6 @@ AVCodec ff_prores_decoder = { .init = decode_init, .close = decode_close, .decode = decode_frame, + .long_name = NULL_IF_CONFIG_SMALL("ProRes"), .capabilities = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS, - .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)") }; |