diff options
Diffstat (limited to 'libavcodec/hevcdec.c')
-rw-r--r-- | libavcodec/hevcdec.c | 2354 |
1 files changed, 1391 insertions, 963 deletions
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 130b99f..967f8f1 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -1,25 +1,25 @@ /* - * HEVC video decoder + * HEVC video Decoder * * Copyright (C) 2012 - 2013 Guillaume Martres * Copyright (C) 2012 - 2013 Mickael Raulet * Copyright (C) 2012 - 2013 Gildas Cocherel * Copyright (C) 2012 - 2013 Wassim Hamidouche * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -27,6 +27,7 @@ #include "libavutil/common.h" #include "libavutil/display.h" #include "libavutil/internal.h" +#include "libavutil/mastering_display_metadata.h" #include "libavutil/md5.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -35,74 +36,15 @@ #include "bswapdsp.h" #include "bytestream.h" #include "cabac_functions.h" -#include "golomb_legacy.h" +#include "golomb.h" #include "hevc.h" #include "hevc_data.h" +#include "hevc_parse.h" #include "hevcdec.h" #include "hwaccel.h" #include "profiles.h" -const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 }; -const uint8_t ff_hevc_qpel_extra_after[4] = { 0, 4, 4, 4 }; -const uint8_t ff_hevc_qpel_extra[4] = { 0, 7, 7, 7 }; - -static const uint8_t scan_1x1[1] = { 0 }; - -static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 }; - -static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 }; - -static const uint8_t horiz_scan4x4_x[16] = { - 0, 1, 2, 3, - 0, 1, 2, 3, - 0, 1, 2, 3, - 0, 1, 2, 3, -}; - -static const uint8_t horiz_scan4x4_y[16] = { - 0, 0, 0, 0, - 1, 1, 1, 1, - 2, 2, 2, 2, - 3, 3, 3, 3, -}; - -static const uint8_t horiz_scan8x8_inv[8][8] = { - { 0, 1, 2, 3, 16, 17, 18, 19, }, - { 4, 5, 6, 7, 20, 21, 22, 23, }, - { 8, 9, 10, 11, 24, 25, 26, 27, }, - { 12, 13, 14, 15, 28, 29, 30, 31, }, - { 32, 33, 34, 35, 48, 49, 50, 51, }, - { 36, 37, 38, 39, 52, 53, 54, 55, }, - { 40, 41, 42, 43, 56, 57, 58, 59, }, - { 44, 45, 46, 47, 60, 61, 62, 63, }, -}; - -static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 }; - -static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 }; - -static const uint8_t diag_scan2x2_inv[2][2] = { - { 0, 2, }, - { 1, 3, }, -}; - -static const uint8_t diag_scan4x4_inv[4][4] = { - { 0, 2, 5, 9, }, - { 1, 4, 8, 12, }, - { 3, 7, 11, 14, }, - { 6, 10, 13, 15, }, -}; - -static const uint8_t diag_scan8x8_inv[8][8] = { - { 0, 2, 5, 9, 14, 20, 27, 35, }, - { 1, 4, 8, 13, 19, 26, 34, 42, }, - { 3, 7, 12, 18, 25, 33, 41, 48, }, - { 6, 11, 17, 24, 32, 40, 47, 53, }, - { 10, 16, 23, 31, 39, 46, 52, 57, }, - { 15, 22, 30, 38, 45, 51, 56, 60, }, - { 21, 29, 37, 44, 50, 55, 59, 62, }, - { 28, 36, 43, 49, 54, 58, 61, 63, }, -}; +const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; /** * NOTE: Each function hls_foo correspond to the function foo in the @@ -133,6 +75,10 @@ static void pic_arrays_free(HEVCContext *s) av_freep(&s->horizontal_bs); av_freep(&s->vertical_bs); + av_freep(&s->sh.entry_point_offset); + av_freep(&s->sh.size); + av_freep(&s->sh.offset); + av_buffer_pool_uninit(&s->tab_mvf_pool); av_buffer_pool_uninit(&s->rpl_tab_pool); } @@ -148,40 +94,40 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) int ctb_count = sps->ctb_width * sps->ctb_height; int min_pu_size = sps->min_pu_width * sps->min_pu_height; - s->bs_width = width >> 3; - s->bs_height = height >> 3; + s->bs_width = (width >> 2) + 1; + s->bs_height = (height >> 2) + 1; s->sao = av_mallocz_array(ctb_count, sizeof(*s->sao)); s->deblock = av_mallocz_array(ctb_count, sizeof(*s->deblock)); if (!s->sao || !s->deblock) goto fail; - s->skip_flag = av_malloc(pic_size_in_ctb); - s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width); + s->skip_flag = av_malloc_array(sps->min_cb_height, sps->min_cb_width); + s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width); if (!s->skip_flag || !s->tab_ct_depth) goto fail; - s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height); + s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height); s->tab_ipm = av_mallocz(min_pu_size); - s->is_pcm = av_malloc(min_pu_size); + s->is_pcm = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1); if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm) goto fail; - s->filter_slice_edges = av_malloc(ctb_count); - s->tab_slice_address = av_malloc(pic_size_in_ctb * + s->filter_slice_edges = av_mallocz(ctb_count); + s->tab_slice_address = av_malloc_array(pic_size_in_ctb, sizeof(*s->tab_slice_address)); - s->qp_y_tab = av_malloc(pic_size_in_ctb * + s->qp_y_tab = av_malloc_array(pic_size_in_ctb, sizeof(*s->qp_y_tab)); if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address) goto fail; - s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1)); - s->vertical_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1)); + s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height); + s->vertical_bs = av_mallocz_array(s->bs_width, s->bs_height); if (!s->horizontal_bs || !s->vertical_bs) goto fail; s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField), - av_buffer_alloc); + av_buffer_allocz); s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab), av_buffer_allocz); if (!s->tab_mvf_pool || !s->rpl_tab_pool) @@ -194,7 +140,7 @@ fail: return AVERROR(ENOMEM); } -static void pred_weight_table(HEVCContext *s, GetBitContext *gb) +static int pred_weight_table(HEVCContext *s, GetBitContext *gb) { int i = 0; int j = 0; @@ -202,11 +148,21 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) uint8_t chroma_weight_l0_flag[16]; uint8_t luma_weight_l1_flag[16]; uint8_t chroma_weight_l1_flag[16]; + int luma_log2_weight_denom; - s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7); + luma_log2_weight_denom = get_ue_golomb_long(gb); + if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) { + av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom); + return AVERROR_INVALIDDATA; + } + s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3); if (s->ps.sps->chroma_format_idc != 0) { - int delta = get_se_golomb(gb); - s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7); + int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb); + if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) { + av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom); + return AVERROR_INVALIDDATA; + } + s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom; } for (i = 0; i < s->sh.nb_refs[L0]; i++) { @@ -216,7 +172,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) s->sh.luma_offset_l0[i] = 0; } } - if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for" + if (s->ps.sps->chroma_format_idc != 0) { for (i = 0; i < s->sh.nb_refs[L0]; i++) chroma_weight_l0_flag[i] = get_bits1(gb); } else { @@ -233,6 +189,12 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) for (j = 0; j < 2; j++) { int delta_chroma_weight_l0 = get_se_golomb(gb); int delta_chroma_offset_l0 = get_se_golomb(gb); + + if ( (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0 + || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) { + return AVERROR_INVALIDDATA; + } + s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0; s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j]) >> s->sh.chroma_log2_weight_denom) + 128), -128, 127); @@ -269,6 +231,12 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) for (j = 0; j < 2; j++) { int delta_chroma_weight_l1 = get_se_golomb(gb); int delta_chroma_offset_l1 = get_se_golomb(gb); + + if ( (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1 + || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) { + return AVERROR_INVALIDDATA; + } + s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1; s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j]) >> s->sh.chroma_log2_weight_denom) + 128), -128, 127); @@ -281,6 +249,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb) } } } + return 0; } static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) @@ -299,7 +268,9 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) nb_sps = get_ue_golomb_long(gb); nb_sh = get_ue_golomb_long(gb); - if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc)) + if (nb_sps > sps->num_long_term_ref_pics_sps) + return AVERROR_INVALIDDATA; + if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc)) return AVERROR_INVALIDDATA; rps->nb_refs = nb_sh + nb_sps; @@ -322,12 +293,16 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) delta_poc_msb_present = get_bits1(gb); if (delta_poc_msb_present) { - int delta = get_ue_golomb_long(gb); + int64_t delta = get_ue_golomb_long(gb); + int64_t poc; if (i && i != nb_sps) delta += prev_delta_msb; - rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb; + poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb; + if (poc != (int32_t)poc) + return AVERROR_INVALIDDATA; + rps->poc[i] = poc; prev_delta_msb = delta; } } @@ -384,46 +359,75 @@ static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps, static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) { - #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ - CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL + \ - CONFIG_HEVC_CUVID_HWACCEL) +#define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \ + CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ + CONFIG_HEVC_NVDEC_HWACCEL + \ + CONFIG_HEVC_VAAPI_HWACCEL + \ + CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ + CONFIG_HEVC_VDPAU_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; - if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P || - sps->pix_fmt == AV_PIX_FMT_YUV420P10) { + switch (sps->pix_fmt) { + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUVJ420P: +#if CONFIG_HEVC_DXVA2_HWACCEL + *fmt++ = AV_PIX_FMT_DXVA2_VLD; +#endif #if CONFIG_HEVC_D3D11VA_HWACCEL *fmt++ = AV_PIX_FMT_D3D11VA_VLD; *fmt++ = AV_PIX_FMT_D3D11; #endif +#if CONFIG_HEVC_VAAPI_HWACCEL + *fmt++ = AV_PIX_FMT_VAAPI; +#endif +#if CONFIG_HEVC_VDPAU_HWACCEL + *fmt++ = AV_PIX_FMT_VDPAU; +#endif +#if CONFIG_HEVC_NVDEC_HWACCEL + *fmt++ = AV_PIX_FMT_CUDA; +#endif +#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif + break; + case AV_PIX_FMT_YUV420P10: #if CONFIG_HEVC_DXVA2_HWACCEL *fmt++ = AV_PIX_FMT_DXVA2_VLD; #endif +#if CONFIG_HEVC_D3D11VA_HWACCEL + *fmt++ = AV_PIX_FMT_D3D11VA_VLD; + *fmt++ = AV_PIX_FMT_D3D11; +#endif #if CONFIG_HEVC_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; #endif -#if CONFIG_HEVC_CUVID_HWACCEL && HAVE_CUVIDDECODECREATEINFO_BITDEPTHMINUS8 - *fmt++ = AV_PIX_FMT_CUDA; +#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; #endif - } - if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) { -#if CONFIG_HEVC_CUVID_HWACCEL && !HAVE_CUVIDDECODECREATEINFO_BITDEPTHMINUS8 +#if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif -#if CONFIG_HEVC_VDPAU_HWACCEL - *fmt++ = AV_PIX_FMT_VDPAU; + break; + case AV_PIX_FMT_YUV420P12: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV444P10: + case AV_PIX_FMT_YUV444P12: +#if CONFIG_HEVC_NVDEC_HWACCEL + *fmt++ = AV_PIX_FMT_CUDA; #endif + break; } *fmt++ = sps->pix_fmt; *fmt = AV_PIX_FMT_NONE; - return ff_get_format(s->avctx, pix_fmts); + return ff_thread_get_format(s->avctx, pix_fmts); } static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt) { - int ret; + int ret, i; pic_arrays_free(s); s->ps.sps = NULL; @@ -444,12 +448,25 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps, ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth); ff_videodsp_init (&s->vdsp, sps->bit_depth); + for (i = 0; i < 3; i++) { + av_freep(&s->sao_pixel_buffer_h[i]); + av_freep(&s->sao_pixel_buffer_v[i]); + } + if (sps->sao_enabled && !s->avctx->hwaccel) { - av_frame_unref(s->tmp_frame); - ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF); - if (ret < 0) - goto fail; - s->frame = s->tmp_frame; + int c_count = (sps->chroma_format_idc != 0) ? 3 : 1; + int c_idx; + + for(c_idx = 0; c_idx < c_count; c_idx++) { + int w = sps->width >> sps->hshift[c_idx]; + int h = sps->height >> sps->vshift[c_idx]; + s->sao_pixel_buffer_h[c_idx] = + av_malloc((w * 2 * sps->ctb_height) << + sps->pixel_shift); + s->sao_pixel_buffer_v[c_idx] = + av_malloc((h * 2 * sps->ctb_width) << + sps->pixel_shift); + } } s->ps.sps = sps; @@ -465,7 +482,7 @@ fail: static int hls_slice_header(HEVCContext *s) { - GetBitContext *gb = &s->HEVClc.gb; + GetBitContext *gb = &s->HEVClc->gb; SliceHeader *sh = &s->sh; int i, ret; @@ -477,6 +494,7 @@ static int hls_slice_header(HEVCContext *s) if (IS_IDR(s)) ff_hevc_clear_refs(s); } + sh->no_output_of_prior_pics_flag = 0; if (IS_IRAP(s)) sh->no_output_of_prior_pics_flag = get_bits1(gb); @@ -491,11 +509,20 @@ static int hls_slice_header(HEVCContext *s) return AVERROR_INVALIDDATA; } s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data; + if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1) + sh->no_output_of_prior_pics_flag = 1; if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) { const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data; + const HEVCSPS *last_sps = s->ps.sps; enum AVPixelFormat pix_fmt; + if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) { + if (sps->width != last_sps->width || sps->height != last_sps->height || + sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering != + last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering) + sh->no_output_of_prior_pics_flag = 0; + } ff_hevc_clear_refs(s); ret = set_sps(s, sps, sps->pix_fmt); @@ -520,7 +547,7 @@ static int hls_slice_header(HEVCContext *s) slice_address_length = av_ceil_log2(s->ps.sps->ctb_width * s->ps.sps->ctb_height); - sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0; + sh->slice_segment_addr = get_bitsz(gb, slice_address_length); if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) { av_log(s->avctx, AV_LOG_ERROR, "Invalid slice segment address: %u.\n", @@ -569,7 +596,7 @@ static int hls_slice_header(HEVCContext *s) int poc, pos; sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb); - poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb); + poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type); if (!sh->first_slice_in_pic_flag && poc != s->poc) { av_log(s->avctx, AV_LOG_WARNING, "Ignoring POC change between slices: %d -> %d\n", s->poc, poc); @@ -620,7 +647,7 @@ static int hls_slice_header(HEVCContext *s) } /* 8.3.1 */ - if (s->temporal_id == 0 && + if (sh->first_slice_in_pic_flag && s->temporal_id == 0 && s->nal_unit_type != HEVC_NAL_TRAIL_N && s->nal_unit_type != HEVC_NAL_TSA_N && s->nal_unit_type != HEVC_NAL_STSA_N && @@ -632,8 +659,10 @@ static int hls_slice_header(HEVCContext *s) if (s->ps.sps->sao_enabled) { sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb); - sh->slice_sample_adaptive_offset_flag[1] = - sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb); + if (s->ps.sps->chroma_format_idc) { + sh->slice_sample_adaptive_offset_flag[1] = + sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb); + } } else { sh->slice_sample_adaptive_offset_flag[0] = 0; sh->slice_sample_adaptive_offset_flag[1] = 0; @@ -709,7 +738,9 @@ static int hls_slice_header(HEVCContext *s) if ((s->ps.pps->weighted_pred_flag && sh->slice_type == HEVC_SLICE_P) || (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) { - pred_weight_table(s, gb); + int ret = pred_weight_table(s, gb); + if (ret < 0) + return ret; } sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb); @@ -731,6 +762,11 @@ static int hls_slice_header(HEVCContext *s) sh->slice_cr_qp_offset = 0; } + if (s->ps.pps->chroma_qp_offset_list_enabled_flag) + sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb); + else + sh->cu_chroma_qp_offset_enabled_flag = 0; + if (s->ps.pps->deblocking_filter_control_present_flag) { int deblocking_filter_override_flag = 0; @@ -740,8 +776,17 @@ static int hls_slice_header(HEVCContext *s) if (deblocking_filter_override_flag) { sh->disable_deblocking_filter_flag = get_bits1(gb); if (!sh->disable_deblocking_filter_flag) { - sh->beta_offset = get_se_golomb(gb) * 2; - sh->tc_offset = get_se_golomb(gb) * 2; + int beta_offset_div2 = get_se_golomb(gb); + int tc_offset_div2 = get_se_golomb(gb) ; + if (beta_offset_div2 < -6 || beta_offset_div2 > 6 || + tc_offset_div2 < -6 || tc_offset_div2 > 6) { + av_log(s->avctx, AV_LOG_ERROR, + "Invalid deblock filter offsets: %d, %d\n", + beta_offset_div2, tc_offset_div2); + return AVERROR_INVALIDDATA; + } + sh->beta_offset = beta_offset_div2 * 2; + sh->tc_offset = tc_offset_div2 * 2; } } else { sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf; @@ -769,23 +814,59 @@ static int hls_slice_header(HEVCContext *s) sh->num_entry_point_offsets = 0; if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) { - sh->num_entry_point_offsets = get_ue_golomb_long(gb); + unsigned num_entry_point_offsets = get_ue_golomb_long(gb); + // It would be possible to bound this tighter but this here is simpler + if (num_entry_point_offsets > get_bits_left(gb)) { + av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets); + return AVERROR_INVALIDDATA; + } + + sh->num_entry_point_offsets = num_entry_point_offsets; if (sh->num_entry_point_offsets > 0) { int offset_len = get_ue_golomb_long(gb) + 1; - for (i = 0; i < sh->num_entry_point_offsets; i++) - skip_bits(gb, offset_len); - } + if (offset_len < 1 || offset_len > 32) { + sh->num_entry_point_offsets = 0; + av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len); + return AVERROR_INVALIDDATA; + } + + av_freep(&sh->entry_point_offset); + av_freep(&sh->offset); + av_freep(&sh->size); + sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned)); + sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int)); + sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int)); + if (!sh->entry_point_offset || !sh->offset || !sh->size) { + sh->num_entry_point_offsets = 0; + av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n"); + return AVERROR(ENOMEM); + } + for (i = 0; i < sh->num_entry_point_offsets; i++) { + unsigned val = get_bits_long(gb, offset_len); + sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size + } + if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) { + s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here + s->threads_number = 1; + } else + s->enable_parallel_tiles = 0; + } else + s->enable_parallel_tiles = 0; } if (s->ps.pps->slice_header_extension_present_flag) { unsigned int length = get_ue_golomb_long(gb); + if (length*8LL > get_bits_left(gb)) { + av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n"); + return AVERROR_INVALIDDATA; + } for (i = 0; i < length; i++) skip_bits(gb, 8); // slice_header_extension_data_byte } // Inferred parameters - sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta; + sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta; if (sh->slice_qp > 51 || sh->slice_qp < -s->ps.sps->qp_bd_offset) { av_log(s->avctx, AV_LOG_ERROR, @@ -803,13 +884,20 @@ static int hls_slice_header(HEVCContext *s) return AVERROR_INVALIDDATA; } - s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag; + if (get_bits_left(gb) < 0) { + av_log(s->avctx, AV_LOG_ERROR, + "Overread slice header by %d bits\n", -get_bits_left(gb)); + return AVERROR_INVALIDDATA; + } + + s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag; if (!s->ps.pps->cu_qp_delta_enabled_flag) - s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset, - 52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset; + s->HEVClc->qp_y = s->sh.slice_qp; s->slice_initialized = 1; + s->HEVClc->tu.cu_qp_offset_cb = 0; + s->HEVClc->tu.cu_qp_offset_cr = 0; return 0; } @@ -830,10 +918,9 @@ do { \ static void hls_sao_param(HEVCContext *s, int rx, int ry) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; int sao_merge_left_flag = 0; int sao_merge_up_flag = 0; - int shift = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10); SAOParams *sao = &CTB(s->sao, rx, ry); int c_idx, i; @@ -849,7 +936,10 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry) } } - for (c_idx = 0; c_idx < 3; c_idx++) { + for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) { + int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma : + s->ps.pps->log2_sao_offset_scale_chroma; + if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) { sao->type_idx[c_idx] = SAO_NOT_APPLIED; continue; @@ -885,13 +975,14 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry) // Inferred parameters sao->offset_val[c_idx][0] = 0; for (i = 0; i < 4; i++) { - sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift; + sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i]; if (sao->type_idx[c_idx] == SAO_EDGE) { if (i > 1) sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1]; } else if (sao->offset_sign[c_idx][i]) { sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1]; } + sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale; } } } @@ -899,396 +990,45 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry) #undef SET_SAO #undef CTB -static void hls_residual_coding(HEVCContext *s, int x0, int y0, - int log2_trafo_size, enum ScanType scan_idx, - int c_idx) -{ -#define GET_COORD(offset, n) \ - do { \ - x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n]; \ - y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n]; \ - } while (0) - HEVCLocalContext *lc = &s->HEVClc; - int transform_skip_flag = 0; - - int last_significant_coeff_x, last_significant_coeff_y; - int last_scan_pos; - int n_end; - int num_coeff = 0; - int greater1_ctx = 1; - - int num_last_subset; - int x_cg_last_sig, y_cg_last_sig; - - const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off; - - ptrdiff_t stride = s->frame->linesize[c_idx]; - int hshift = s->ps.sps->hshift[c_idx]; - int vshift = s->ps.sps->vshift[c_idx]; - uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + - ((x0 >> hshift) << s->ps.sps->pixel_shift)]; - LOCAL_ALIGNED_32(int16_t, coeffs, [MAX_TB_SIZE * MAX_TB_SIZE]); - LOCAL_ALIGNED_8(uint8_t, significant_coeff_group_flag, [8], [8]); - - int trafo_size = 1 << log2_trafo_size; - int i, qp, shift, add, scale, scale_m; - static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 }; - const uint8_t *scale_matrix; - uint8_t dc_scale; - - memset(coeffs, 0, sizeof(int16_t) * MAX_TB_SIZE * MAX_TB_SIZE); - memset(significant_coeff_group_flag, 0, sizeof(uint8_t) * 8 * 8); - // Derive QP for dequant - if (!lc->cu.cu_transquant_bypass_flag) { - static const int qp_c[] = { - 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 - }; - - static const uint8_t rem6[51 + 2 * 6 + 1] = { - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, - 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, - 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, - }; - - static const uint8_t div6[51 + 2 * 6 + 1] = { - 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, - 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, - }; - int qp_y = lc->qp_y; - - if (c_idx == 0) { - qp = qp_y + s->ps.sps->qp_bd_offset; - } else { - int qp_i, offset; - - if (c_idx == 1) - offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset; - else - offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset; - - qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57); - if (qp_i < 30) - qp = qp_i; - else if (qp_i > 43) - qp = qp_i - 6; - else - qp = qp_c[qp_i - 30]; - - qp += s->ps.sps->qp_bd_offset; - } - - shift = s->ps.sps->bit_depth + log2_trafo_size - 5; - add = 1 << (shift - 1); - scale = level_scale[rem6[qp]] << (div6[qp]); - scale_m = 16; // default when no custom scaling lists. - dc_scale = 16; - - if (s->ps.sps->scaling_list_enable_flag) { - const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ? - &s->ps.pps->scaling_list : &s->ps.sps->scaling_list; - int matrix_id = lc->cu.pred_mode != MODE_INTRA; - - if (log2_trafo_size != 5) - matrix_id = 3 * matrix_id + c_idx; - - scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id]; - if (log2_trafo_size >= 4) - dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id]; - } - } - - if (s->ps.pps->transform_skip_enabled_flag && - !lc->cu.cu_transquant_bypass_flag && - log2_trafo_size == 2) { - transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx); - } - - last_significant_coeff_x = - ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size); - last_significant_coeff_y = - ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size); - - if (last_significant_coeff_x > 3) { - int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x); - last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) * - (2 + (last_significant_coeff_x & 1)) + - suffix; - } +static int hls_cross_component_pred(HEVCContext *s, int idx) { + HEVCLocalContext *lc = s->HEVClc; + int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx); - if (last_significant_coeff_y > 3) { - int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y); - last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) * - (2 + (last_significant_coeff_y & 1)) + - suffix; - } - - if (scan_idx == SCAN_VERT) - FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y); - - x_cg_last_sig = last_significant_coeff_x >> 2; - y_cg_last_sig = last_significant_coeff_y >> 2; - - switch (scan_idx) { - case SCAN_DIAG: { - int last_x_c = last_significant_coeff_x & 3; - int last_y_c = last_significant_coeff_y & 3; - - scan_x_off = ff_hevc_diag_scan4x4_x; - scan_y_off = ff_hevc_diag_scan4x4_y; - num_coeff = diag_scan4x4_inv[last_y_c][last_x_c]; - if (trafo_size == 4) { - scan_x_cg = scan_1x1; - scan_y_cg = scan_1x1; - } else if (trafo_size == 8) { - num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4; - scan_x_cg = diag_scan2x2_x; - scan_y_cg = diag_scan2x2_y; - } else if (trafo_size == 16) { - num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4; - scan_x_cg = ff_hevc_diag_scan4x4_x; - scan_y_cg = ff_hevc_diag_scan4x4_y; - } else { // trafo_size == 32 - num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4; - scan_x_cg = ff_hevc_diag_scan8x8_x; - scan_y_cg = ff_hevc_diag_scan8x8_y; - } - break; - } - case SCAN_HORIZ: - scan_x_cg = horiz_scan2x2_x; - scan_y_cg = horiz_scan2x2_y; - scan_x_off = horiz_scan4x4_x; - scan_y_off = horiz_scan4x4_y; - num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x]; - break; - default: //SCAN_VERT - scan_x_cg = horiz_scan2x2_y; - scan_y_cg = horiz_scan2x2_x; - scan_x_off = horiz_scan4x4_y; - scan_y_off = horiz_scan4x4_x; - num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y]; - break; + if (log2_res_scale_abs_plus1 != 0) { + int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx); + lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) * + (1 - 2 * res_scale_sign_flag); + } else { + lc->tu.res_scale_val = 0; } - num_coeff++; - num_last_subset = (num_coeff - 1) >> 4; - - for (i = num_last_subset; i >= 0; i--) { - int n, m; - int x_cg, y_cg, x_c, y_c; - int implicit_non_zero_coeff = 0; - int64_t trans_coeff_level; - int prev_sig = 0; - int offset = i << 4; - - uint8_t significant_coeff_flag_idx[16]; - uint8_t nb_significant_coeff_flag = 0; - - x_cg = scan_x_cg[i]; - y_cg = scan_y_cg[i]; - - if (i < num_last_subset && i > 0) { - int ctx_cg = 0; - if (x_cg < (1 << (log2_trafo_size - 2)) - 1) - ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg]; - if (y_cg < (1 << (log2_trafo_size - 2)) - 1) - ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1]; - - significant_coeff_group_flag[x_cg][y_cg] = - ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg); - implicit_non_zero_coeff = 1; - } else { - significant_coeff_group_flag[x_cg][y_cg] = - ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) || - (x_cg == 0 && y_cg == 0)); - } - last_scan_pos = num_coeff - offset - 1; - if (i == num_last_subset) { - n_end = last_scan_pos - 1; - significant_coeff_flag_idx[0] = last_scan_pos; - nb_significant_coeff_flag = 1; - } else { - n_end = 15; - } - - if (x_cg < ((1 << log2_trafo_size) - 1) >> 2) - prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg]; - if (y_cg < ((1 << log2_trafo_size) - 1) >> 2) - prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1; - - for (n = n_end; n >= 0; n--) { - GET_COORD(offset, n); - - if (significant_coeff_group_flag[x_cg][y_cg] && - (n > 0 || implicit_non_zero_coeff == 0)) { - if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c, - log2_trafo_size, - scan_idx, - prev_sig) == 1) { - significant_coeff_flag_idx[nb_significant_coeff_flag] = n; - nb_significant_coeff_flag++; - implicit_non_zero_coeff = 0; - } - } else { - int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2)); - if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) { - significant_coeff_flag_idx[nb_significant_coeff_flag] = n; - nb_significant_coeff_flag++; - } - } - } - - n_end = nb_significant_coeff_flag; - - if (n_end) { - int first_nz_pos_in_cg = 16; - int last_nz_pos_in_cg = -1; - int c_rice_param = 0; - int first_greater1_coeff_idx = -1; - uint8_t coeff_abs_level_greater1_flag[16] = { 0 }; - uint16_t coeff_sign_flag; - int sum_abs = 0; - int sign_hidden = 0; - - // initialize first elem of coeff_bas_level_greater1_flag - int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0; - - if (!(i == num_last_subset) && greater1_ctx == 0) - ctx_set++; - greater1_ctx = 1; - last_nz_pos_in_cg = significant_coeff_flag_idx[0]; - - for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) { - int n_idx = significant_coeff_flag_idx[m]; - int inc = (ctx_set << 2) + greater1_ctx; - coeff_abs_level_greater1_flag[n_idx] = - ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc); - if (coeff_abs_level_greater1_flag[n_idx]) { - greater1_ctx = 0; - } else if (greater1_ctx > 0 && greater1_ctx < 3) { - greater1_ctx++; - } - - if (coeff_abs_level_greater1_flag[n_idx] && - first_greater1_coeff_idx == -1) - first_greater1_coeff_idx = n_idx; - } - first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1]; - sign_hidden = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 && - !lc->cu.cu_transquant_bypass_flag; - - if (first_greater1_coeff_idx != -1) { - coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set); - } - if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) { - coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag); - } else { - coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1)); - } - - for (m = 0; m < n_end; m++) { - n = significant_coeff_flag_idx[m]; - GET_COORD(offset, n); - trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n]; - if (trans_coeff_level == ((m < 8) ? - ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) { - trans_coeff_level += ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param); - if ((trans_coeff_level) > (3 * (1 << c_rice_param))) - c_rice_param = FFMIN(c_rice_param + 1, 4); - } - if (s->ps.pps->sign_data_hiding_flag && sign_hidden) { - sum_abs += trans_coeff_level; - if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1)) - trans_coeff_level = -trans_coeff_level; - } - if (coeff_sign_flag >> 15) - trans_coeff_level = -trans_coeff_level; - coeff_sign_flag <<= 1; - if (!lc->cu.cu_transquant_bypass_flag) { - if (s->ps.sps->scaling_list_enable_flag) { - if (y_c || x_c || log2_trafo_size < 4) { - int pos; - switch (log2_trafo_size) { - case 3: pos = (y_c << 3) + x_c; break; - case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break; - case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break; - default: pos = (y_c << 2) + x_c; - } - scale_m = scale_matrix[pos]; - } else { - scale_m = dc_scale; - } - } - trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift; - if(trans_coeff_level < 0) { - if((~trans_coeff_level) & 0xFffffffffff8000) - trans_coeff_level = -32768; - } else { - if (trans_coeff_level & 0xffffffffffff8000) - trans_coeff_level = 32767; - } - } - coeffs[y_c * trafo_size + x_c] = trans_coeff_level; - } - } - } - - if (!lc->cu.cu_transquant_bypass_flag) { - if (transform_skip_flag) - s->hevcdsp.dequant(coeffs); - else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && - log2_trafo_size == 2) - s->hevcdsp.transform_4x4_luma(coeffs); - else { - int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); - if (max_xy == 0) - s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs); - else { - int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4; - if (max_xy < 4) - col_limit = FFMIN(4, col_limit); - else if (max_xy < 8) - col_limit = FFMIN(8, col_limit); - else if (max_xy < 12) - col_limit = FFMIN(24, col_limit); - s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit); - } - } - } - s->hevcdsp.add_residual[log2_trafo_size - 2](dst, coeffs, stride); + return 0; } static int hls_transform_unit(HEVCContext *s, int x0, int y0, int xBase, int yBase, int cb_xBase, int cb_yBase, int log2_cb_size, int log2_trafo_size, - int blk_idx, int cbf_luma, int cbf_cb, int cbf_cr) + int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; + const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1]; + int i; if (lc->cu.pred_mode == MODE_INTRA) { int trafo_size = 1 << log2_trafo_size; ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size); s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0); - if (log2_trafo_size > 2) { - trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1); - ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size); - s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1); - s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2); - } else if (blk_idx == 3) { - trafo_size = trafo_size << s->ps.sps->hshift[1]; - ff_hevc_set_neighbour_available(s, xBase, yBase, - trafo_size, trafo_size); - s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1); - s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2); - } } - if (cbf_luma || cbf_cb || cbf_cr) { + if (cbf_luma || cbf_cb[0] || cbf_cr[0] || + (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) { int scan_idx = SCAN_DIAG; int scan_idx_c = SCAN_DIAG; + int cbf_chroma = cbf_cb[0] || cbf_cr[0] || + (s->ps.sps->chroma_format_idc == 2 && + (cbf_cb[1] || cbf_cr[1])); if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) { lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s); @@ -1308,41 +1048,167 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, return AVERROR_INVALIDDATA; } - ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size); + ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size); + } + + if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma && + !lc->cu.cu_transquant_bypass_flag && !lc->tu.is_cu_chroma_qp_offset_coded) { + int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s); + if (cu_chroma_qp_offset_flag) { + int cu_chroma_qp_offset_idx = 0; + if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) { + cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s); + av_log(s->avctx, AV_LOG_ERROR, + "cu_chroma_qp_offset_idx not yet tested.\n"); + } + lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx]; + lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx]; + } else { + lc->tu.cu_qp_offset_cb = 0; + lc->tu.cu_qp_offset_cr = 0; + } + lc->tu.is_cu_chroma_qp_offset_coded = 1; } if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) { - if (lc->tu.cur_intra_pred_mode >= 6 && - lc->tu.cur_intra_pred_mode <= 14) { + if (lc->tu.intra_pred_mode >= 6 && + lc->tu.intra_pred_mode <= 14) { scan_idx = SCAN_VERT; - } else if (lc->tu.cur_intra_pred_mode >= 22 && - lc->tu.cur_intra_pred_mode <= 30) { + } else if (lc->tu.intra_pred_mode >= 22 && + lc->tu.intra_pred_mode <= 30) { scan_idx = SCAN_HORIZ; } - if (lc->pu.intra_pred_mode_c >= 6 && - lc->pu.intra_pred_mode_c <= 14) { + if (lc->tu.intra_pred_mode_c >= 6 && + lc->tu.intra_pred_mode_c <= 14) { scan_idx_c = SCAN_VERT; - } else if (lc->pu.intra_pred_mode_c >= 22 && - lc->pu.intra_pred_mode_c <= 30) { + } else if (lc->tu.intra_pred_mode_c >= 22 && + lc->tu.intra_pred_mode_c <= 30) { scan_idx_c = SCAN_HORIZ; } } + lc->tu.cross_pf = 0; + if (cbf_luma) - hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0); - if (log2_trafo_size > 2) { - if (cbf_cb) - hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1); - if (cbf_cr) - hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2); + ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0); + if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) { + int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]); + int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]); + lc->tu.cross_pf = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma && + (lc->cu.pred_mode == MODE_INTER || + (lc->tu.chroma_mode_c == 4))); + + if (lc->tu.cross_pf) { + hls_cross_component_pred(s, 0); + } + for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { + if (lc->cu.pred_mode == MODE_INTRA) { + ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1); + } + if (cbf_cb[i]) + ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c), + log2_trafo_size_c, scan_idx_c, 1); + else + if (lc->tu.cross_pf) { + ptrdiff_t stride = s->frame->linesize[1]; + int hshift = s->ps.sps->hshift[1]; + int vshift = s->ps.sps->vshift[1]; + int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer; + int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2; + int size = 1 << log2_trafo_size_c; + + uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride + + ((x0 >> hshift) << s->ps.sps->pixel_shift)]; + for (i = 0; i < (size * size); i++) { + coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); + } + s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride); + } + } + + if (lc->tu.cross_pf) { + hls_cross_component_pred(s, 1); + } + for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { + if (lc->cu.pred_mode == MODE_INTRA) { + ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2); + } + if (cbf_cr[i]) + ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c), + log2_trafo_size_c, scan_idx_c, 2); + else + if (lc->tu.cross_pf) { + ptrdiff_t stride = s->frame->linesize[2]; + int hshift = s->ps.sps->hshift[2]; + int vshift = s->ps.sps->vshift[2]; + int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer; + int16_t *coeffs = (int16_t*)lc->edge_emu_buffer2; + int size = 1 << log2_trafo_size_c; + + uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride + + ((x0 >> hshift) << s->ps.sps->pixel_shift)]; + for (i = 0; i < (size * size); i++) { + coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); + } + s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride); + } + } + } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) { + int trafo_size_h = 1 << (log2_trafo_size + 1); + int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]); + for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { + if (lc->cu.pred_mode == MODE_INTRA) { + ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size), + trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1); + } + if (cbf_cb[i]) + ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size), + log2_trafo_size, scan_idx_c, 1); + } + for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { + if (lc->cu.pred_mode == MODE_INTRA) { + ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size), + trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2); + } + if (cbf_cr[i]) + ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size), + log2_trafo_size, scan_idx_c, 2); + } + } + } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) { + if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) { + int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]); + int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]); + ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1); + s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2); + if (s->ps.sps->chroma_format_idc == 2) { + ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c), + trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1); + s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2); + } } else if (blk_idx == 3) { - if (cbf_cb) - hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1); - if (cbf_cr) - hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2); + int trafo_size_h = 1 << (log2_trafo_size + 1); + int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]); + ff_hevc_set_neighbour_available(s, xBase, yBase, + trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2); + if (s->ps.sps->chroma_format_idc == 2) { + ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)), + trafo_size_h, trafo_size_v); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1); + s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2); + } } } + return 0; } @@ -1365,17 +1231,34 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0, int xBase, int yBase, int cb_xBase, int cb_yBase, int log2_cb_size, int log2_trafo_size, int trafo_depth, int blk_idx, - int cbf_cb, int cbf_cr) + const int *base_cbf_cb, const int *base_cbf_cr) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; uint8_t split_transform_flag; + int cbf_cb[2]; + int cbf_cr[2]; int ret; + cbf_cb[0] = base_cbf_cb[0]; + cbf_cb[1] = base_cbf_cb[1]; + cbf_cr[0] = base_cbf_cr[0]; + cbf_cr[1] = base_cbf_cr[1]; + if (lc->cu.intra_split_flag) { - if (trafo_depth == 1) - lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx]; + if (trafo_depth == 1) { + lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[blk_idx]; + if (s->ps.sps->chroma_format_idc == 3) { + lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx]; + lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[blk_idx]; + } else { + lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0]; + lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0]; + } + } } else { - lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0]; + lc->tu.intra_pred_mode = lc->pu.intra_pred_mode[0]; + lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0]; + lc->tu.chroma_mode_c = lc->pu.chroma_mode_c[0]; } if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size && @@ -1394,14 +1277,21 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0, inter_split; } - if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb)) - cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); - else if (log2_trafo_size > 2 || trafo_depth == 0) - cbf_cb = 0; - if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr)) - cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); - else if (log2_trafo_size > 2 || trafo_depth == 0) - cbf_cr = 0; + if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) { + if (trafo_depth == 0 || cbf_cb[0]) { + cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); + if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) { + cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); + } + } + + if (trafo_depth == 0 || cbf_cr[0]) { + cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); + if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) { + cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth); + } + } + } if (split_transform_flag) { const int trafo_size_split = 1 << (log2_trafo_size - 1); @@ -1430,8 +1320,10 @@ do { int cbf_luma = 1; if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 || - cbf_cb || cbf_cr) + cbf_cb[0] || cbf_cr[0] || + (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) { cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth); + } ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase, log2_cb_size, log2_trafo_size, @@ -1460,8 +1352,7 @@ do { static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) { - //TODO: non-4:2:0 support - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; GetBitContext gb; int cb_size = 1 << log2_cb_size; ptrdiff_t stride0 = s->frame->linesize[0]; @@ -1471,7 +1362,10 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)]; uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)]; - int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma; + int length = cb_size * cb_size * s->ps.sps->pcm.bit_depth + + (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) + + ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) * + s->ps.sps->pcm.bit_depth_chroma; const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3); int ret; @@ -1482,38 +1376,23 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) if (ret < 0) return ret; - s->hevcdsp.put_pcm(dst0, stride0, cb_size, &gb, s->ps.sps->pcm.bit_depth); - s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma); - s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma); - return 0; -} - -static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size) -{ - HEVCLocalContext *lc = &s->HEVClc; - int x = ff_hevc_abs_mvd_greater0_flag_decode(s); - int y = ff_hevc_abs_mvd_greater0_flag_decode(s); - - if (x) - x += ff_hevc_abs_mvd_greater1_flag_decode(s); - if (y) - y += ff_hevc_abs_mvd_greater1_flag_decode(s); - - switch (x) { - case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s); break; - case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break; - case 0: lc->pu.mvd.x = 0; break; + s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size, &gb, s->ps.sps->pcm.bit_depth); + if (s->ps.sps->chroma_format_idc) { + s->hevcdsp.put_pcm(dst1, stride1, + cb_size >> s->ps.sps->hshift[1], + cb_size >> s->ps.sps->vshift[1], + &gb, s->ps.sps->pcm.bit_depth_chroma); + s->hevcdsp.put_pcm(dst2, stride2, + cb_size >> s->ps.sps->hshift[2], + cb_size >> s->ps.sps->vshift[2], + &gb, s->ps.sps->pcm.bit_depth_chroma); } - switch (y) { - case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s); break; - case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break; - case 0: lc->pu.mvd.y = 0; break; - } + return 0; } /** - * 8.5.3.2.2.1 Luma sample interpolation process + * 8.5.3.2.2.1 Luma sample unidirectional interpolation process * * @param s HEVC decoding context * @param dst target buffer for block data at block position @@ -1524,49 +1403,147 @@ static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size) * @param y_off vertical position of block from origin (0, 0) * @param block_w width of block * @param block_h height of block + * @param luma_weight weighting factor applied to the luma prediction + * @param luma_offset additive offset applied to the luma prediction value */ -static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride, - AVFrame *ref, const Mv *mv, int x_off, int y_off, - int block_w, int block_h, int pred_idx) + +static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, + AVFrame *ref, const Mv *mv, int x_off, int y_off, + int block_w, int block_h, int luma_weight, int luma_offset) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; uint8_t *src = ref->data[0]; ptrdiff_t srcstride = ref->linesize[0]; int pic_width = s->ps.sps->width; int pic_height = s->ps.sps->height; - - int mx = mv->x & 3; - int my = mv->y & 3; - int extra_left = ff_hevc_qpel_extra_before[mx]; - int extra_top = ff_hevc_qpel_extra_before[my]; + int mx = mv->x & 3; + int my = mv->y & 3; + int weight_flag = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) || + (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag); + int idx = ff_hevc_pel_weight[block_w]; x_off += mv->x >> 2; y_off += mv->y >> 2; src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); - if (x_off < extra_left || y_off < extra_top || - x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] || - y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) { + if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER || + x_off >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) { const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; - int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift); - int buf_offset = extra_top * - edge_emu_stride + (extra_left << s->ps.sps->pixel_shift); + int offset = QPEL_EXTRA_BEFORE * srcstride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset, edge_emu_stride, srcstride, - block_w + ff_hevc_qpel_extra[mx], - block_h + ff_hevc_qpel_extra[my], - x_off - extra_left, y_off - extra_top, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE, pic_width, pic_height); src = lc->edge_emu_buffer + buf_offset; srcstride = edge_emu_stride; } - s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride, - block_h, mx, my, lc->mc_buffer); + + if (!weight_flag) + s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride, + block_h, mx, my, block_w); + else + s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride, + block_h, s->sh.luma_log2_weight_denom, + luma_weight, luma_offset, mx, my, block_w); } /** - * 8.5.3.2.2.2 Chroma sample interpolation process + * 8.5.3.2.2.1 Luma sample bidirectional interpolation process + * + * @param s HEVC decoding context + * @param dst target buffer for block data at block position + * @param dststride stride of the dst buffer + * @param ref0 reference picture0 buffer at origin (0, 0) + * @param mv0 motion vector0 (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block + * @param block_h height of block + * @param ref1 reference picture1 buffer at origin (0, 0) + * @param mv1 motion vector1 (relative to block position) to get pixel data from + * @param current_mv current motion vector structure + */ + static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, + AVFrame *ref0, const Mv *mv0, int x_off, int y_off, + int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) +{ + HEVCLocalContext *lc = s->HEVClc; + ptrdiff_t src0stride = ref0->linesize[0]; + ptrdiff_t src1stride = ref1->linesize[0]; + int pic_width = s->ps.sps->width; + int pic_height = s->ps.sps->height; + int mx0 = mv0->x & 3; + int my0 = mv0->y & 3; + int mx1 = mv1->x & 3; + int my1 = mv1->y & 3; + int weight_flag = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) || + (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag); + int x_off0 = x_off + (mv0->x >> 2); + int y_off0 = y_off + (mv0->y >> 2); + int x_off1 = x_off + (mv1->x >> 2); + int y_off1 = y_off + (mv1->y >> 2); + int idx = ff_hevc_pel_weight[block_w]; + + uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift); + uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift); + + if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER || + x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) { + const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + int offset = QPEL_EXTRA_BEFORE * src0stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset, + edge_emu_stride, src0stride, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE, + pic_width, pic_height); + src0 = lc->edge_emu_buffer + buf_offset; + src0stride = edge_emu_stride; + } + + if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER || + x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER || + y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) { + const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + int offset = QPEL_EXTRA_BEFORE * src1stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift); + + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset, + edge_emu_stride, src1stride, + block_w + QPEL_EXTRA, + block_h + QPEL_EXTRA, + x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE, + pic_width, pic_height); + src1 = lc->edge_emu_buffer2 + buf_offset; + src1stride = edge_emu_stride; + } + + s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride, + block_h, mx0, my0, block_w); + if (!weight_flag) + s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp, + block_h, mx1, my1, block_w); + else + s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp, + block_h, s->sh.luma_log2_weight_denom, + s->sh.luma_weight_l0[current_mv->ref_idx[0]], + s->sh.luma_weight_l1[current_mv->ref_idx[1]], + s->sh.luma_offset_l0[current_mv->ref_idx[0]], + s->sh.luma_offset_l1[current_mv->ref_idx[1]], + mx1, my1, block_w); + +} + +/** + * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process * * @param s HEVC decoding context * @param dst1 target buffer for block data at block position (U plane) @@ -1578,85 +1555,185 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride, * @param y_off vertical position of block from origin (0, 0) * @param block_w width of block * @param block_h height of block + * @param chroma_weight weighting factor applied to the chroma prediction + * @param chroma_offset additive offset applied to the chroma prediction value */ -static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2, - ptrdiff_t dststride, AVFrame *ref, const Mv *mv, - int x_off, int y_off, int block_w, int block_h, int pred_idx) + +static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, + ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, + int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) { - HEVCLocalContext *lc = &s->HEVClc; - uint8_t *src1 = ref->data[1]; - uint8_t *src2 = ref->data[2]; - ptrdiff_t src1stride = ref->linesize[1]; - ptrdiff_t src2stride = ref->linesize[2]; - int pic_width = s->ps.sps->width >> 1; - int pic_height = s->ps.sps->height >> 1; - - int mx = mv->x & 7; - int my = mv->y & 7; - - x_off += mv->x >> 3; - y_off += mv->y >> 3; - src1 += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift)); - src2 += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift)); + HEVCLocalContext *lc = s->HEVClc; + int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; + int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; + const Mv *mv = ¤t_mv->mv[reflist]; + int weight_flag = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) || + (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag); + int idx = ff_hevc_pel_weight[block_w]; + int hshift = s->ps.sps->hshift[1]; + int vshift = s->ps.sps->vshift[1]; + intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); + intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); + intptr_t _mx = mx << (1 - hshift); + intptr_t _my = my << (1 - vshift); + + x_off += mv->x >> (2 + hshift); + y_off += mv->y >> (2 + vshift); + src0 += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER || x_off >= pic_width - block_w - EPEL_EXTRA_AFTER || y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) { - const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift)); + int buf_offset0 = EPEL_EXTRA_BEFORE * + (edge_emu_stride + (1 << s->ps.sps->pixel_shift)); + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0, + edge_emu_stride, srcstride, + block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, + x_off - EPEL_EXTRA_BEFORE, + y_off - EPEL_EXTRA_BEFORE, + pic_width, pic_height); + + src0 = lc->edge_emu_buffer + buf_offset0; + srcstride = edge_emu_stride; + } + if (!weight_flag) + s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride, + block_h, _mx, _my, block_w); + else + s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride, + block_h, s->sh.chroma_log2_weight_denom, + chroma_weight, chroma_offset, _mx, _my, block_w); +} + +/** + * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process + * + * @param s HEVC decoding context + * @param dst target buffer for block data at block position + * @param dststride stride of the dst buffer + * @param ref0 reference picture0 buffer at origin (0, 0) + * @param mv0 motion vector0 (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block + * @param block_h height of block + * @param ref1 reference picture1 buffer at origin (0, 0) + * @param mv1 motion vector1 (relative to block position) to get pixel data from + * @param current_mv current motion vector structure + * @param cidx chroma component(cb, cr) + */ +static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, + int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) +{ + HEVCLocalContext *lc = s->HEVClc; + uint8_t *src1 = ref0->data[cidx+1]; + uint8_t *src2 = ref1->data[cidx+1]; + ptrdiff_t src1stride = ref0->linesize[cidx+1]; + ptrdiff_t src2stride = ref1->linesize[cidx+1]; + int weight_flag = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) || + (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag); + int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; + int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; + Mv *mv0 = ¤t_mv->mv[0]; + Mv *mv1 = ¤t_mv->mv[1]; + int hshift = s->ps.sps->hshift[1]; + int vshift = s->ps.sps->vshift[1]; + + intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift); + intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift); + intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift); + intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift); + intptr_t _mx0 = mx0 << (1 - hshift); + intptr_t _my0 = my0 << (1 - vshift); + intptr_t _mx1 = mx1 << (1 - hshift); + intptr_t _my1 = my1 << (1 - vshift); + + int x_off0 = x_off + (mv0->x >> (2 + hshift)); + int y_off0 = y_off + (mv0->y >> (2 + vshift)); + int x_off1 = x_off + (mv1->x >> (2 + hshift)); + int y_off1 = y_off + (mv1->y >> (2 + vshift)); + int idx = ff_hevc_pel_weight[block_w]; + src1 += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift); + src2 += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift); + + if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER || + x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER || + y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift)); int buf_offset1 = EPEL_EXTRA_BEFORE * (edge_emu_stride + (1 << s->ps.sps->pixel_shift)); - int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift)); - int buf_offset2 = EPEL_EXTRA_BEFORE * - (edge_emu_stride + (1 << s->ps.sps->pixel_shift)); s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1, edge_emu_stride, src1stride, block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, - x_off - EPEL_EXTRA_BEFORE, - y_off - EPEL_EXTRA_BEFORE, + x_off0 - EPEL_EXTRA_BEFORE, + y_off0 - EPEL_EXTRA_BEFORE, pic_width, pic_height); src1 = lc->edge_emu_buffer + buf_offset1; src1stride = edge_emu_stride; - s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride, - block_h, mx, my, lc->mc_buffer); + } + + if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER || + x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER || + y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) { + const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift; + int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift)); + int buf_offset1 = EPEL_EXTRA_BEFORE * + (edge_emu_stride + (1 << s->ps.sps->pixel_shift)); - s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2, + s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1, edge_emu_stride, src2stride, block_w + EPEL_EXTRA, block_h + EPEL_EXTRA, - x_off - EPEL_EXTRA_BEFORE, - y_off - EPEL_EXTRA_BEFORE, + x_off1 - EPEL_EXTRA_BEFORE, + y_off1 - EPEL_EXTRA_BEFORE, pic_width, pic_height); - src2 = lc->edge_emu_buffer + buf_offset2; - src2stride = edge_emu_stride; - s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride, - block_h, mx, my, lc->mc_buffer); - } else { - s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride, - block_h, mx, my, lc->mc_buffer); - s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride, - block_h, mx, my, lc->mc_buffer); + src2 = lc->edge_emu_buffer2 + buf_offset1; + src2stride = edge_emu_stride; } + + s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride, + block_h, _mx0, _my0, block_w); + if (!weight_flag) + s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1], + src2, src2stride, lc->tmp, + block_h, _mx1, _my1, block_w); + else + s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1], + src2, src2stride, lc->tmp, + block_h, + s->sh.chroma_log2_weight_denom, + s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx], + s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx], + s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx], + s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx], + _mx1, _my1, block_w); } static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref, const Mv *mv, int y0, int height) { - int y = (mv->y >> 2) + y0 + height + 9; - ff_thread_await_progress(&ref->tf, y, 0); + if (s->threads_type == FF_THREAD_FRAME ) { + int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9); + + ff_thread_await_progress(&ref->tf, y, 0); + } } -static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW, +static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW, int nPbH, int log2_cb_size, int part_idx, int merge_idx, MvField *mv) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; enum InterPredIdc inter_pred_idc = PRED_L0; int mvp_flag; ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH); + mv->pred_flag = 0; if (s->sh.slice_type == HEVC_SLICE_B) inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH); @@ -1664,8 +1741,8 @@ static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW, if (s->sh.nb_refs[L0]) mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]); - mv->pred_flag[0] = 1; - hls_mvd_coding(s, x0, y0, 0); + mv->pred_flag = PF_L0; + ff_hevc_hls_mvd_coding(s, x0, y0, 0); mvp_flag = ff_hevc_mvp_lx_flag_decode(s); ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, part_idx, merge_idx, mv, mvp_flag, 0); @@ -1680,10 +1757,10 @@ static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW, if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) { AV_ZERO32(&lc->pu.mvd); } else { - hls_mvd_coding(s, x0, y0, 1); + ff_hevc_hls_mvd_coding(s, x0, y0, 1); } - mv->pred_flag[1] = 1; + mv->pred_flag += PF_L1; mvp_flag = ff_hevc_mvp_lx_flag_decode(s); ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, part_idx, merge_idx, mv, mvp_flag, 1); @@ -1694,30 +1771,20 @@ static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW, static void hls_prediction_unit(HEVCContext *s, int x0, int y0, int nPbW, int nPbH, - int log2_cb_size, int partIdx) + int log2_cb_size, int partIdx, int idx) { - static const int pred_indices[] = { - [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7, - }; - const int pred_idx = pred_indices[nPbW]; - #define POS(c_idx, x, y) \ &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \ (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)] - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; int merge_idx = 0; struct MvField current_mv = {{{ 0 }}}; int min_pu_width = s->ps.sps->min_pu_width; - int weighted_pred = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) || - (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag); MvField *tab_mvf = s->ref->tab_mvf; RefPicList *refPicList = s->ref->refPicList; - HEVCFrame *ref0, *ref1; - - ptrdiff_t tmpstride = MAX_PB_SIZE * sizeof(int16_t); - + HEVCFrame *ref0 = NULL, *ref1 = NULL; uint8_t *dst0 = POS(0, x0, y0); uint8_t *dst1 = POS(1, x0, y0); uint8_t *dst2 = POS(2, x0, y0); @@ -1742,7 +1809,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, partIdx, merge_idx, ¤t_mv); } else { - hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, + hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size, partIdx, merge_idx, ¤t_mv); } @@ -1753,133 +1820,74 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++) tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv; - if (current_mv.pred_flag[0]) { + if (current_mv.pred_flag & PF_L0) { ref0 = refPicList[0].ref[current_mv.ref_idx[0]]; if (!ref0) return; hevc_await_progress(s, ref0, ¤t_mv.mv[0], y0, nPbH); } - if (current_mv.pred_flag[1]) { + if (current_mv.pred_flag & PF_L1) { ref1 = refPicList[1].ref[current_mv.ref_idx[1]]; if (!ref1) return; hevc_await_progress(s, ref1, ¤t_mv.mv[1], y0, nPbH); } - if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) { - LOCAL_ALIGNED_16(int16_t, tmp, [MAX_PB_SIZE * MAX_PB_SIZE]); - LOCAL_ALIGNED_16(int16_t, tmp2, [MAX_PB_SIZE * MAX_PB_SIZE]); + if (current_mv.pred_flag == PF_L0) { + int x0_c = x0 >> s->ps.sps->hshift[1]; + int y0_c = y0 >> s->ps.sps->vshift[1]; + int nPbW_c = nPbW >> s->ps.sps->hshift[1]; + int nPbH_c = nPbH >> s->ps.sps->vshift[1]; - luma_mc(s, tmp, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx); + luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame, + ¤t_mv.mv[0], x0, y0, nPbW, nPbH, + s->sh.luma_weight_l0[current_mv.ref_idx[0]], + s->sh.luma_offset_l0[current_mv.ref_idx[0]]); - if (weighted_pred) { - s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom, - s->sh.luma_weight_l0[current_mv.ref_idx[0]], - s->sh.luma_offset_l0[current_mv.ref_idx[0]], - dst0, s->frame->linesize[0], tmp, - tmpstride, nPbH); - } else { - s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH); - } - chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx); - - if (weighted_pred) { - s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0], - dst1, s->frame->linesize[1], tmp, tmpstride, - nPbH / 2); - s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1], - dst2, s->frame->linesize[2], tmp2, tmpstride, - nPbH / 2); - } else { - s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmpstride, nPbH / 2); - s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2); - } - } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) { - LOCAL_ALIGNED_16(int16_t, tmp, [MAX_PB_SIZE * MAX_PB_SIZE]); - LOCAL_ALIGNED_16(int16_t, tmp2, [MAX_PB_SIZE * MAX_PB_SIZE]); - - luma_mc(s, tmp, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx); - - if (weighted_pred) { - s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom, - s->sh.luma_weight_l1[current_mv.ref_idx[1]], - s->sh.luma_offset_l1[current_mv.ref_idx[1]], - dst0, s->frame->linesize[0], tmp, tmpstride, - nPbH); - } else { - s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH); + if (s->ps.sps->chroma_format_idc) { + chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], + 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]); + chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2], + 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]); } + } else if (current_mv.pred_flag == PF_L1) { + int x0_c = x0 >> s->ps.sps->hshift[1]; + int y0_c = y0 >> s->ps.sps->vshift[1]; + int nPbW_c = nPbW >> s->ps.sps->hshift[1]; + int nPbH_c = nPbH >> s->ps.sps->vshift[1]; - chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx); + luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame, + ¤t_mv.mv[1], x0, y0, nPbW, nPbH, + s->sh.luma_weight_l1[current_mv.ref_idx[1]], + s->sh.luma_offset_l1[current_mv.ref_idx[1]]); - if (weighted_pred) { - s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0], - dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2); - s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1], - dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2); - } else { - s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmpstride, nPbH / 2); - s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2); - } - } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) { - LOCAL_ALIGNED_16(int16_t, tmp, [MAX_PB_SIZE * MAX_PB_SIZE]); - LOCAL_ALIGNED_16(int16_t, tmp2, [MAX_PB_SIZE * MAX_PB_SIZE]); - LOCAL_ALIGNED_16(int16_t, tmp3, [MAX_PB_SIZE * MAX_PB_SIZE]); - LOCAL_ALIGNED_16(int16_t, tmp4, [MAX_PB_SIZE * MAX_PB_SIZE]); - - luma_mc(s, tmp, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx); - luma_mc(s, tmp2, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx); - - if (weighted_pred) { - s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom, - s->sh.luma_weight_l0[current_mv.ref_idx[0]], - s->sh.luma_weight_l1[current_mv.ref_idx[1]], - s->sh.luma_offset_l0[current_mv.ref_idx[0]], - s->sh.luma_offset_l1[current_mv.ref_idx[1]], - dst0, s->frame->linesize[0], - tmp, tmp2, tmpstride, nPbH); - } else { - s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0], - tmp, tmp2, tmpstride, nPbH); - } - - chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame, - ¤t_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx); - chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame, - ¤t_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx); - - if (weighted_pred) { - s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0], - dst1, s->frame->linesize[1], tmp, tmp3, - tmpstride, nPbH / 2); - s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom, - s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], - s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], - s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1], - s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1], - dst2, s->frame->linesize[2], tmp2, tmp4, - tmpstride, nPbH / 2); - } else { - s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbH/2); - s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2); + if (s->ps.sps->chroma_format_idc) { + chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], + 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]); + + chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2], + 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, + s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]); + } + } else if (current_mv.pred_flag == PF_BI) { + int x0_c = x0 >> s->ps.sps->hshift[1]; + int y0_c = y0 >> s->ps.sps->vshift[1]; + int nPbW_c = nPbW >> s->ps.sps->hshift[1]; + int nPbH_c = nPbH >> s->ps.sps->vshift[1]; + + luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame, + ¤t_mv.mv[0], x0, y0, nPbW, nPbH, + ref1->frame, ¤t_mv.mv[1], ¤t_mv); + + if (s->ps.sps->chroma_format_idc) { + chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, + x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0); + + chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame, + x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1); } } } @@ -1890,13 +1898,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size, int prev_intra_luma_pred_flag) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; int x_pu = x0 >> s->ps.sps->log2_min_pu_size; int y_pu = y0 >> s->ps.sps->log2_min_pu_size; int min_pu_width = s->ps.sps->min_pu_width; int size_in_pus = pu_size >> s->ps.sps->log2_min_pu_size; - int x0b = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1); - int y0b = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1); + int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size); + int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size); int cand_up = (lc->ctb_up_flag || y0b) ? s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC; @@ -1960,15 +1968,7 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size, intra_pred_mode, size_in_pus); for (j = 0; j < size_in_pus; j++) { - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra = 1; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1] = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x = 0; - tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y = 0; + tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA; } } @@ -1988,10 +1988,14 @@ static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0, ct_depth, length); } +static const uint8_t tab_mode_idx[] = { + 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, + 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31}; + static void intra_prediction_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 }; uint8_t prev_intra_luma_pred_flag[4]; int split = lc->cu.part_mode == PART_NxN; @@ -2017,14 +2021,42 @@ static void intra_prediction_unit(HEVCContext *s, int x0, int y0, } } - chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s); - if (chroma_mode != 4) { - if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode]) - lc->pu.intra_pred_mode_c = 34; - else - lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode]; - } else { - lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0]; + if (s->ps.sps->chroma_format_idc == 3) { + for (i = 0; i < side; i++) { + for (j = 0; j < side; j++) { + lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s); + if (chroma_mode != 4) { + if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode]) + lc->pu.intra_pred_mode_c[2 * i + j] = 34; + else + lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode]; + } else { + lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j]; + } + } + } + } else if (s->ps.sps->chroma_format_idc == 2) { + int mode_idx; + lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s); + if (chroma_mode != 4) { + if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode]) + mode_idx = 34; + else + mode_idx = intra_chroma_table[chroma_mode]; + } else { + mode_idx = lc->pu.intra_pred_mode[0]; + } + lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx]; + } else if (s->ps.sps->chroma_format_idc != 0) { + chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s); + if (chroma_mode != 4) { + if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode]) + lc->pu.intra_pred_mode_c[0] = 34; + else + lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode]; + } else { + lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0]; + } } } @@ -2032,7 +2064,7 @@ static void intra_prediction_unit_default_value(HEVCContext *s, int x0, int y0, int log2_cb_size) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; int pb_size = 1 << log2_cb_size; int size_in_pus = pb_size >> s->ps.sps->log2_min_pu_size; int min_pu_width = s->ps.sps->min_pu_width; @@ -2043,22 +2075,25 @@ static void intra_prediction_unit_default_value(HEVCContext *s, if (size_in_pus == 0) size_in_pus = 1; - for (j = 0; j < size_in_pus; j++) { + for (j = 0; j < size_in_pus; j++) memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus); - for (k = 0; k < size_in_pus; k++) - tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA; - } + if (lc->cu.pred_mode == MODE_INTRA) + for (j = 0; j < size_in_pus; j++) + for (k = 0; k < size_in_pus; k++) + tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA; } static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) { int cb_size = 1 << log2_cb_size; - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; int log2_min_cb_size = s->ps.sps->log2_min_cb_size; int length = cb_size >> log2_min_cb_size; int min_cb_width = s->ps.sps->min_cb_width; int x_cb = x0 >> log2_min_cb_size; int y_cb = y0 >> log2_min_cb_size; + int idx = log2_cb_size - 2; + int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1; int x, y, ret; lc->cu.x = x0; @@ -2086,10 +2121,16 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) x += min_cb_width; } lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER; + } else { + x = y_cb * min_cb_width + x_cb; + for (y = 0; y < length; y++) { + memset(&s->skip_flag[x], 0, length); + x += min_cb_width; + } } if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) { - hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0); + hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx); intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); if (!s->sh.disable_deblocking_filter_flag) @@ -2127,37 +2168,37 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); switch (lc->cu.part_mode) { case PART_2Nx2N: - hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0); + hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx); break; case PART_2NxN: - hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0); - hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1); + hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx); + hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx); break; case PART_Nx2N: - hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0); - hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1); + hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1); + hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1); break; case PART_2NxnU: - hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0); - hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1); + hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx); + hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx); break; case PART_2NxnD: - hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0); - hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1); + hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx); + hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx); break; case PART_nLx2N: - hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0); - hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1); + hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2); + hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2); break; case PART_nRx2N: - hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0); - hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1); + hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2); + hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2); break; case PART_NxN: - hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0); - hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1); - hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2); - hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3); + hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1); + hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1); + hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1); + hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1); break; } } @@ -2170,12 +2211,13 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s); } if (rqt_root_cbf) { + const static int cbf[2] = { 0 }; lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ? s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag : s->ps.sps->max_transform_hierarchy_depth_inter; ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size, - log2_cb_size, 0, 0, 0, 0); + log2_cb_size, 0, 0, cbf, cbf); if (ret < 0) return ret; } else { @@ -2186,7 +2228,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) } if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0) - ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size); + ff_hevc_set_qPy(s, x0, y0, log2_cb_size); x = y_cb * min_cb_width + x_cb; for (y = 0; y < length; y++) { @@ -2194,7 +2236,12 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) x += min_cb_width; } - set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth); + if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 && + ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) { + lc->qPy_pred = lc->qp_y; + } + + set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth); return 0; } @@ -2202,11 +2249,12 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) static int hls_coding_quadtree(HEVCContext *s, int x0, int y0, int log2_cb_size, int cb_depth) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; const int cb_size = 1 << log2_cb_size; + int ret; int split_cu; - lc->ct.depth = cb_depth; + lc->ct_depth = cb_depth; if (x0 + cb_size <= s->ps.sps->width && y0 + cb_size <= s->ps.sps->height && log2_cb_size > s->ps.sps->log2_min_cb_size) { @@ -2220,31 +2268,64 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0, lc->tu.cu_qp_delta = 0; } + if (s->sh.cu_chroma_qp_offset_enabled_flag && + log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) { + lc->tu.is_cu_chroma_qp_offset_coded = 0; + } + if (split_cu) { + int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1; const int cb_size_split = cb_size >> 1; const int x1 = x0 + cb_size_split; const int y1 = y0 + cb_size_split; - log2_cb_size--; - cb_depth++; + int more_data = 0; -#define SUBDIVIDE(x, y) \ -do { \ - if (x < s->ps.sps->width && y < s->ps.sps->height) { \ - int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\ - if (ret < 0) \ - return ret; \ - } \ -} while (0) + more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1); + if (more_data < 0) + return more_data; - SUBDIVIDE(x0, y0); - SUBDIVIDE(x1, y0); - SUBDIVIDE(x0, y1); - SUBDIVIDE(x1, y1); + if (more_data && x1 < s->ps.sps->width) { + more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1); + if (more_data < 0) + return more_data; + } + if (more_data && y1 < s->ps.sps->height) { + more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1); + if (more_data < 0) + return more_data; + } + if (more_data && x1 < s->ps.sps->width && + y1 < s->ps.sps->height) { + more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1); + if (more_data < 0) + return more_data; + } + + if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 && + ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) + lc->qPy_pred = lc->qp_y; + + if (more_data) + return ((x1 + cb_size_split) < s->ps.sps->width || + (y1 + cb_size_split) < s->ps.sps->height); + else + return 0; } else { - int ret = hls_coding_unit(s, x0, y0, log2_cb_size); + ret = hls_coding_unit(s, x0, y0, log2_cb_size); if (ret < 0) return ret; + if ((!((x0 + cb_size) % + (1 << (s->ps.sps->log2_ctb_size))) || + (x0 + cb_size >= s->ps.sps->width)) && + (!((y0 + cb_size) % + (1 << (s->ps.sps->log2_ctb_size))) || + (y0 + cb_size >= s->ps.sps->height))) { + int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s); + return !end_of_slice_flag; + } else { + return 1; + } } return 0; @@ -2253,7 +2334,7 @@ do { \ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, int ctb_addr_ts) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; int ctb_size = 1 << s->ps.sps->log2_ctb_size; int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts]; int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr; @@ -2267,7 +2348,6 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, } else if (s->ps.pps->tiles_enabled_flag) { if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) { int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size]; - lc->start_of_tiles_x = x_ctb; lc->end_of_tiles_x = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size); lc->first_qp_group = 1; } @@ -2288,7 +2368,7 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width]) lc->boundary_flags |= BOUNDARY_UPPER_SLICE; } else { - if (!ctb_addr_in_slice) + if (ctb_addr_in_slice <= 0) lc->boundary_flags |= BOUNDARY_LEFT_SLICE; if (ctb_addr_in_slice < s->ps.sps->ctb_width) lc->boundary_flags |= BOUNDARY_UPPER_SLICE; @@ -2300,8 +2380,9 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]])); } -static int hls_slice_data(HEVCContext *s) +static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) { + HEVCContext *s = avctxt->priv_data; int ctb_size = 1 << s->ps.sps->log2_ctb_size; int more_data = 1; int x_ctb = 0; @@ -2309,6 +2390,19 @@ static int hls_slice_data(HEVCContext *s) int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; int ret; + if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { + av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); + return AVERROR_INVALIDDATA; + } + + if (s->sh.dependent_slice_segment_flag) { + int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1]; + if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) { + av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n"); + return AVERROR_INVALIDDATA; + } + } + while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) { int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts]; @@ -2316,7 +2410,11 @@ static int hls_slice_data(HEVCContext *s) y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size; hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts); - ff_hevc_cabac_init(s, ctb_addr_ts); + ret = ff_hevc_cabac_init(s, ctb_addr_ts); + if (ret < 0) { + s->tab_slice_address[ctb_addr_rs] = -1; + return ret; + } hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size); @@ -2324,10 +2422,12 @@ static int hls_slice_data(HEVCContext *s) s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset; s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; - ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); - if (ret < 0) - return ret; - more_data = !ff_hevc_end_of_slice_flag_decode(s); + more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); + if (more_data < 0) { + s->tab_slice_address[ctb_addr_rs] = -1; + return more_data; + } + ctb_addr_ts++; ff_hevc_save_states(s, ctb_addr_ts); @@ -2336,36 +2436,199 @@ static int hls_slice_data(HEVCContext *s) if (x_ctb + ctb_size >= s->ps.sps->width && y_ctb + ctb_size >= s->ps.sps->height) - ff_hevc_hls_filter(s, x_ctb, y_ctb); + ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); return ctb_addr_ts; } -static void restore_tqb_pixels(HEVCContext *s) +static int hls_slice_data(HEVCContext *s) +{ + int arg[2]; + int ret[2]; + + arg[0] = 0; + arg[1] = 1; + + s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int)); + return ret[0]; +} +static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id) { - int min_pu_size = 1 << s->ps.sps->log2_min_pu_size; - int x, y, c_idx; - - for (c_idx = 0; c_idx < 3; c_idx++) { - ptrdiff_t stride = s->frame->linesize[c_idx]; - int hshift = s->ps.sps->hshift[c_idx]; - int vshift = s->ps.sps->vshift[c_idx]; - for (y = 0; y < s->ps.sps->min_pu_height; y++) { - for (x = 0; x < s->ps.sps->min_pu_width; x++) { - if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) { - int n; - int len = min_pu_size >> hshift; - uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)]; - uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)]; - for (n = 0; n < (min_pu_size >> vshift); n++) { - memcpy(dst, src, len); - src += stride; - dst += stride; - } - } + HEVCContext *s1 = avctxt->priv_data, *s; + HEVCLocalContext *lc; + int ctb_size = 1<< s1->ps.sps->log2_ctb_size; + int more_data = 1; + int *ctb_row_p = input_ctb_row; + int ctb_row = ctb_row_p[job]; + int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size); + int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs]; + int thread = ctb_row % s1->threads_number; + int ret; + + s = s1->sList[self_id]; + lc = s->HEVClc; + + if(ctb_row) { + ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]); + if (ret < 0) + goto error; + ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]); + } + + while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) { + int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size; + int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size; + + hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts); + + ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP); + + if (atomic_load(&s1->wpp_err)) { + ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP); + return 0; + } + + ret = ff_hevc_cabac_init(s, ctb_addr_ts); + if (ret < 0) + goto error; + hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size); + more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); + + if (more_data < 0) { + ret = more_data; + goto error; + } + + ctb_addr_ts++; + + ff_hevc_save_states(s, ctb_addr_ts); + ff_thread_report_progress2(s->avctx, ctb_row, thread, 1); + ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size); + + if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) { + atomic_store(&s1->wpp_err, 1); + ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP); + return 0; + } + + if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) { + ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); + ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP); + return ctb_addr_ts; + } + ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts]; + x_ctb+=ctb_size; + + if(x_ctb >= s->ps.sps->width) { + break; + } + } + ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP); + + return 0; +error: + s->tab_slice_address[ctb_addr_rs] = -1; + atomic_store(&s1->wpp_err, 1); + ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP); + return ret; +} + +static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) +{ + const uint8_t *data = nal->data; + int length = nal->size; + HEVCLocalContext *lc = s->HEVClc; + int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int)); + int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int)); + int64_t offset; + int64_t startheader, cmpt = 0; + int i, j, res = 0; + + if (!ret || !arg) { + av_free(ret); + av_free(arg); + return AVERROR(ENOMEM); + } + + if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) { + av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n", + s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets, + s->ps.sps->ctb_width, s->ps.sps->ctb_height + ); + res = AVERROR_INVALIDDATA; + goto error; + } + + ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1); + + if (!s->sList[1]) { + for (i = 1; i < s->threads_number; i++) { + s->sList[i] = av_malloc(sizeof(HEVCContext)); + memcpy(s->sList[i], s, sizeof(HEVCContext)); + s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext)); + s->sList[i]->HEVClc = s->HEVClcList[i]; + } + } + + offset = (lc->gb.index >> 3); + + for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) { + if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) { + startheader--; + cmpt++; + } + } + + for (i = 1; i < s->sh.num_entry_point_offsets; i++) { + offset += (s->sh.entry_point_offset[i - 1] - cmpt); + for (j = 0, cmpt = 0, startheader = offset + + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) { + if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) { + startheader--; + cmpt++; } } + s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt; + s->sh.offset[i - 1] = offset; + + } + if (s->sh.num_entry_point_offsets != 0) { + offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt; + if (length < offset) { + av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n"); + res = AVERROR_INVALIDDATA; + goto error; + } + s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset; + s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset; + + } + s->data = data; + + for (i = 1; i < s->threads_number; i++) { + s->sList[i]->HEVClc->first_qp_group = 1; + s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y; + memcpy(s->sList[i], s, sizeof(HEVCContext)); + s->sList[i]->HEVClc = s->HEVClcList[i]; + } + + atomic_store(&s->wpp_err, 0); + ff_reset_entries(s->avctx); + + for (i = 0; i <= s->sh.num_entry_point_offsets; i++) { + arg[i] = i; + ret[i] = 0; } + + if (s->ps.pps->entropy_coding_sync_enabled_flag) + s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1); + + for (i = 0; i <= s->sh.num_entry_point_offsets; i++) + res += ret[i]; +error: + av_free(ret); + av_free(arg); + return res; } static int set_side_data(HEVCContext *s) @@ -2423,6 +2686,86 @@ static int set_side_data(HEVCContext *s) s->sei.display_orientation.vflip); } + // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1 + // so the side data persists for the entire coded video sequence. + if (s->sei.mastering_display.present > 0 && + IS_IRAP(s) && s->no_rasl_output_flag) { + s->sei.mastering_display.present--; + } + if (s->sei.mastering_display.present) { + // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b + const int mapping[3] = {2, 0, 1}; + const int chroma_den = 50000; + const int luma_den = 10000; + int i; + AVMasteringDisplayMetadata *metadata = + av_mastering_display_metadata_create_side_data(out); + if (!metadata) + return AVERROR(ENOMEM); + + for (i = 0; i < 3; i++) { + const int j = mapping[i]; + metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0]; + metadata->display_primaries[i][0].den = chroma_den; + metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1]; + metadata->display_primaries[i][1].den = chroma_den; + } + metadata->white_point[0].num = s->sei.mastering_display.white_point[0]; + metadata->white_point[0].den = chroma_den; + metadata->white_point[1].num = s->sei.mastering_display.white_point[1]; + metadata->white_point[1].den = chroma_den; + + metadata->max_luminance.num = s->sei.mastering_display.max_luminance; + metadata->max_luminance.den = luma_den; + metadata->min_luminance.num = s->sei.mastering_display.min_luminance; + metadata->min_luminance.den = luma_den; + metadata->has_luminance = 1; + metadata->has_primaries = 1; + + av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n"); + av_log(s->avctx, AV_LOG_DEBUG, + "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n", + av_q2d(metadata->display_primaries[0][0]), + av_q2d(metadata->display_primaries[0][1]), + av_q2d(metadata->display_primaries[1][0]), + av_q2d(metadata->display_primaries[1][1]), + av_q2d(metadata->display_primaries[2][0]), + av_q2d(metadata->display_primaries[2][1]), + av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1])); + av_log(s->avctx, AV_LOG_DEBUG, + "min_luminance=%f, max_luminance=%f\n", + av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance)); + } + // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1 + // so the side data persists for the entire coded video sequence. + if (s->sei.content_light.present > 0 && + IS_IRAP(s) && s->no_rasl_output_flag) { + s->sei.content_light.present--; + } + if (s->sei.content_light.present) { + AVContentLightMetadata *metadata = + av_content_light_metadata_create_side_data(out); + if (!metadata) + return AVERROR(ENOMEM); + metadata->MaxCLL = s->sei.content_light.max_content_light_level; + metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level; + + av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n"); + av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n", + metadata->MaxCLL, metadata->MaxFALL); + } + + if (s->sei.a53_caption.a53_caption) { + AVFrameSideData* sd = av_frame_new_side_data(out, + AV_FRAME_DATA_A53_CC, + s->sei.a53_caption.a53_caption_size); + if (sd) + memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size); + av_freep(&s->sei.a53_caption.a53_caption); + s->sei.a53_caption.a53_caption_size = 0; + s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS; + } + if (s->sei.alternative_transfer.present && av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) && s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) { @@ -2434,23 +2777,26 @@ static int set_side_data(HEVCContext *s) static int hevc_frame_start(HEVCContext *s) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; + int pic_size_in_ctb = ((s->ps.sps->width >> s->ps.sps->log2_min_cb_size) + 1) * + ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); int ret; - memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1)); - memset(s->vertical_bs, 0, 2 * s->bs_width * (s->bs_height + 1)); + memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); + memset(s->vertical_bs, 0, s->bs_width * s->bs_height); memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); - memset(s->is_pcm, 0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height); + memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); + memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); - lc->start_of_tiles_x = 0; s->is_decoded = 0; s->first_nal_type = s->nal_unit_type; + s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos); + if (s->ps.pps->tiles_enabled_flag) lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size; - ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame, - s->poc); + ret = ff_hevc_set_new_ref(s, &s->frame, s->poc); if (ret < 0) goto fail; @@ -2466,12 +2812,18 @@ static int hevc_frame_start(HEVCContext *s) if (ret < 0) goto fail; + s->frame->pict_type = 3 - s->sh.slice_type; + + if (!IS_IRAP(s)) + ff_hevc_bump_frame(s); + av_frame_unref(s->output_frame); ret = ff_hevc_output_frame(s, s->output_frame, 0); if (ret < 0) goto fail; - ff_thread_finish_setup(s->avctx); + if (!s->avctx->hwaccel) + ff_thread_finish_setup(s->avctx); return 0; @@ -2484,7 +2836,7 @@ fail: static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) { - HEVCLocalContext *lc = &s->HEVClc; + HEVCLocalContext *lc = s->HEVClc; GetBitContext *gb = &lc->gb; int ctb_addr_ts, ret; @@ -2494,25 +2846,56 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) switch (s->nal_unit_type) { case HEVC_NAL_VPS: + if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { + ret = s->avctx->hwaccel->decode_params(s->avctx, + nal->type, + nal->raw_data, + nal->raw_size); + if (ret < 0) + goto fail; + } ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps); if (ret < 0) goto fail; break; case HEVC_NAL_SPS: + if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { + ret = s->avctx->hwaccel->decode_params(s->avctx, + nal->type, + nal->raw_data, + nal->raw_size); + if (ret < 0) + goto fail; + } ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps, s->apply_defdispwin); if (ret < 0) goto fail; break; case HEVC_NAL_PPS: + if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { + ret = s->avctx->hwaccel->decode_params(s->avctx, + nal->type, + nal->raw_data, + nal->raw_size); + if (ret < 0) + goto fail; + } ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps); if (ret < 0) goto fail; break; case HEVC_NAL_SEI_PREFIX: case HEVC_NAL_SEI_SUFFIX: - ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, - s->nal_unit_type); + if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { + ret = s->avctx->hwaccel->decode_params(s->avctx, + nal->type, + nal->raw_data, + nal->raw_size); + if (ret < 0) + goto fail; + } + ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type); if (ret < 0) goto fail; break; @@ -2536,25 +2919,33 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) if (ret < 0) return ret; - if (s->max_ra == INT_MAX) { - if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) { - s->max_ra = s->poc; - } else { - if (IS_IDR(s)) - s->max_ra = INT_MIN; - } - } - - if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) && - s->poc <= s->max_ra) { - s->is_decoded = 0; + if ( + (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) || + (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) || + (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) { break; - } else { - if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra) - s->max_ra = INT_MIN; } if (s->sh.first_slice_in_pic_flag) { + if (s->max_ra == INT_MAX) { + if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) { + s->max_ra = s->poc; + } else { + if (IS_IDR(s)) + s->max_ra = INT_MIN; + } + } + + if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) && + s->poc <= s->max_ra) { + s->is_decoded = 0; + break; + } else { + if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra) + s->max_ra = INT_MIN; + } + + s->overlap ++; ret = hevc_frame_start(s); if (ret < 0) return ret; @@ -2591,13 +2982,12 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) if (ret < 0) goto fail; } else { - ctb_addr_ts = hls_slice_data(s); + if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0) + ctb_addr_ts = hls_slice_data_wpp(s, nal); + else + ctb_addr_ts = hls_slice_data(s); if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) { s->is_decoded = 1; - if ((s->ps.pps->transquant_bypass_enable_flag || - (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) && - s->ps.sps->sao_enabled) - restore_tqb_pixels(s); } if (ctb_addr_ts < 0) { @@ -2629,14 +3019,17 @@ fail: static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) { int i, ret = 0; + int eos_at_start = 1; s->ref = NULL; + s->last_eos = s->eos; s->eos = 0; + s->overlap = 0; /* split the input packet into NAL units, so we know the upper bound on the * number of slices in the frame */ ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff, - s->nal_length_size, s->avctx->codec_id); + s->nal_length_size, s->avctx->codec_id, 1, 0); if (ret < 0) { av_log(s->avctx, AV_LOG_ERROR, "Error splitting the input into NAL units.\n"); @@ -2645,13 +3038,29 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) for (i = 0; i < s->pkt.nb_nals; i++) { if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT || - s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) - s->eos = 1; + s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) { + if (eos_at_start) { + s->last_eos = 1; + } else { + s->eos = 1; + } + } else { + eos_at_start = 0; + } } /* decode the NAL units */ for (i = 0; i < s->pkt.nb_nals; i++) { - ret = decode_nal_unit(s, &s->pkt.nals[i]); + H2645NAL *nal = &s->pkt.nals[i]; + + if (s->avctx->skip_frame >= AVDISCARD_ALL || + (s->avctx->skip_frame >= AVDISCARD_NONREF + && ff_hevc_nal_is_nonref(nal->type))) + continue; + + ret = decode_nal_unit(s, nal); + if (ret >= 0 && s->overlap > 2) + ret = AVERROR_INVALIDDATA; if (ret < 0) { av_log(s->avctx, AV_LOG_WARNING, "Error parsing NAL unit #%d.\n", i); @@ -2660,7 +3069,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) } fail: - if (s->ref) + if (s->ref && s->threads_type == FF_THREAD_FRAME) ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); return ret; @@ -2739,69 +3148,19 @@ static int verify_md5(HEVCContext *s, AVFrame *frame) return 0; } -static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length) +static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first) { - AVCodecContext *avctx = s->avctx; - GetByteContext gb; int ret, i; - bytestream2_init(&gb, buf, length); - - if (length > 3 && (buf[0] || buf[1] || buf[2] > 1)) { - /* It seems the extradata is encoded as hvcC format. - * Temporarily, we support configurationVersion==0 until 14496-15 3rd - * is finalized. When finalized, configurationVersion will be 1 and we - * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */ - int i, j, num_arrays, nal_len_size; - - s->is_nalff = 1; - - bytestream2_skip(&gb, 21); - nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1; - num_arrays = bytestream2_get_byte(&gb); - - /* nal units in the hvcC always have length coded with 2 bytes, - * so put a fake nal_length_size = 2 while parsing them */ - s->nal_length_size = 2; - - /* Decode nal units from hvcC. */ - for (i = 0; i < num_arrays; i++) { - int type = bytestream2_get_byte(&gb) & 0x3f; - int cnt = bytestream2_get_be16(&gb); - - for (j = 0; j < cnt; j++) { - // +2 for the nal size field - int nalsize = bytestream2_peek_be16(&gb) + 2; - if (bytestream2_get_bytes_left(&gb) < nalsize) { - av_log(s->avctx, AV_LOG_ERROR, - "Invalid NAL unit size in extradata.\n"); - return AVERROR_INVALIDDATA; - } - - ret = decode_nal_units(s, gb.buffer, nalsize); - if (ret < 0) { - av_log(avctx, AV_LOG_ERROR, - "Decoding nal unit %d %d from hvcC failed\n", - type, i); - return ret; - } - bytestream2_skip(&gb, nalsize); - } - } - - /* Now store right nal length size, that will be used to parse - * all other nals */ - s->nal_length_size = nal_len_size; - } else { - s->is_nalff = 0; - ret = decode_nal_units(s, buf, length); - if (ret < 0) - return ret; - } + ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff, + &s->nal_length_size, s->avctx->err_recognition, + s->apply_defdispwin, s->avctx); + if (ret < 0) + return ret; /* export stream parameters from the first SPS */ for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) { - if (s->ps.sps_list[i]) { + if (first && s->ps.sps_list[i]) { const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data; export_stream_params(s->avctx, &s->ps, sps); break; @@ -2831,7 +3190,7 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &new_extradata_size); if (new_extradata && new_extradata_size > 0) { - ret = hevc_decode_extradata(s, new_extradata, new_extradata_size); + ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0); if (ret < 0) return ret; } @@ -2842,9 +3201,12 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, return ret; if (avctx->hwaccel) { - if (s->ref && avctx->hwaccel->end_frame(avctx) < 0) + if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); + ff_hevc_unref_frame(s, s->ref, ~0); + return ret; + } } else { /* verify the SEI checksum */ if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded && @@ -2873,7 +3235,9 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src) { - int ret = ff_thread_ref_frame(&dst->tf, &src->tf); + int ret; + + ret = ff_thread_ref_frame(&dst->tf, &src->tf); if (ret < 0) return ret; @@ -2918,7 +3282,12 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_freep(&s->md5_ctx); - av_frame_free(&s->tmp_frame); + av_freep(&s->cabac_state); + + for (i = 0; i < 3; i++) { + av_freep(&s->sao_pixel_buffer_h[i]); + av_freep(&s->sao_pixel_buffer_v[i]); + } av_frame_free(&s->output_frame); for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) { @@ -2926,12 +3295,22 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_frame_free(&s->DPB[i].frame); } - for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) - av_buffer_unref(&s->ps.vps_list[i]); - for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) - av_buffer_unref(&s->ps.sps_list[i]); - for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) - av_buffer_unref(&s->ps.pps_list[i]); + ff_hevc_ps_uninit(&s->ps); + + av_freep(&s->sh.entry_point_offset); + av_freep(&s->sh.offset); + av_freep(&s->sh.size); + + for (i = 1; i < s->threads_number; i++) { + HEVCLocalContext *lc = s->HEVClcList[i]; + if (lc) { + av_freep(&s->HEVClcList[i]); + av_freep(&s->sList[i]); + } + } + if (s->HEVClc == s->HEVClcList[0]) + s->HEVClc = NULL; + av_freep(&s->HEVClcList[0]); ff_h2645_packet_uninit(&s->pkt); @@ -2945,8 +3324,14 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) s->avctx = avctx; - s->tmp_frame = av_frame_alloc(); - if (!s->tmp_frame) + s->HEVClc = av_mallocz(sizeof(HEVCLocalContext)); + if (!s->HEVClc) + goto fail; + s->HEVClcList[0] = s->HEVClc; + s->sList[0] = s; + + s->cabac_state = av_malloc(HEVC_CONTEXTS); + if (!s->cabac_state) goto fail; s->output_frame = av_frame_alloc(); @@ -2969,6 +3354,9 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ff_bswapdsp_init(&s->bdsp); s->context_initialized = 1; + s->eos = 0; + + ff_hevc_reset_sei(&s->sei); return 0; @@ -2977,6 +3365,7 @@ fail: return AVERROR(ENOMEM); } +#if HAVE_THREADS static int hevc_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) { @@ -2999,6 +3388,8 @@ static int hevc_update_thread_context(AVCodecContext *dst, } } + if (s->ps.sps != s0->ps.sps) + s->ps.sps = NULL; for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) { av_buffer_unref(&s->ps.vps_list[i]); if (s0->ps.vps_list[i]) { @@ -3027,23 +3418,36 @@ static int hevc_update_thread_context(AVCodecContext *dst, } if (s->ps.sps != s0->ps.sps) - ret = set_sps(s, s0->ps.sps, src->pix_fmt); + if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0) + return ret; s->seq_decode = s0->seq_decode; s->seq_output = s0->seq_output; s->pocTid0 = s0->pocTid0; s->max_ra = s0->max_ra; + s->eos = s0->eos; + s->no_rasl_output_flag = s0->no_rasl_output_flag; s->is_nalff = s0->is_nalff; s->nal_length_size = s0->nal_length_size; + s->threads_number = s0->threads_number; + s->threads_type = s0->threads_type; + if (s0->eos) { s->seq_decode = (s->seq_decode + 1) & 0xff; s->max_ra = INT_MAX; } + s->sei.frame_packing = s0->sei.frame_packing; + s->sei.display_orientation = s0->sei.display_orientation; + s->sei.mastering_display = s0->sei.mastering_display; + s->sei.content_light = s0->sei.content_light; + s->sei.alternative_transfer = s0->sei.alternative_transfer; + return 0; } +#endif static av_cold int hevc_decode_init(AVCodecContext *avctx) { @@ -3056,17 +3460,34 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx) if (ret < 0) return ret; + s->enable_parallel_tiles = 0; + s->sei.picture_timing.picture_struct = 0; + s->eos = 1; + + atomic_init(&s->wpp_err, 0); + + if(avctx->active_thread_type & FF_THREAD_SLICE) + s->threads_number = avctx->thread_count; + else + s->threads_number = 1; + if (avctx->extradata_size > 0 && avctx->extradata) { - ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size); + ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1); if (ret < 0) { hevc_decode_free(avctx); return ret; } } + if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1) + s->threads_type = FF_THREAD_FRAME; + else + s->threads_type = FF_THREAD_SLICE; + return 0; } +#if HAVE_THREADS static av_cold int hevc_init_thread_copy(AVCodecContext *avctx) { HEVCContext *s = avctx->priv_data; @@ -3080,12 +3501,14 @@ static av_cold int hevc_init_thread_copy(AVCodecContext *avctx) return 0; } +#endif static void hevc_decode_flush(AVCodecContext *avctx) { HEVCContext *s = avctx->priv_data; ff_hevc_flush_dpb(s); s->max_ra = INT_MAX; + s->eos = 1; } #define OFFSET(x) offsetof(HEVCContext, x) @@ -3093,7 +3516,9 @@ static void hevc_decode_flush(AVCodecContext *avctx) static const AVOption options[] = { { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin), - AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR }, + AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR }, + { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin), + AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR }, { NULL }, }; @@ -3115,16 +3540,13 @@ AVCodec ff_hevc_decoder = { .close = hevc_decode_free, .decode = hevc_decode_frame, .flush = hevc_decode_flush, - .update_thread_context = hevc_update_thread_context, - .init_thread_copy = hevc_init_thread_copy, + .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context), + .init_thread_copy = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy), .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | - AV_CODEC_CAP_FRAME_THREADS, + AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS, + .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING, .profiles = NULL_IF_CONFIG_SMALL(ff_hevc_profiles), - .caps_internal = FF_CODEC_CAP_EXPORTS_CROPPING | FF_CODEC_CAP_INIT_THREADSAFE, .hw_configs = (const AVCodecHWConfigInternal*[]) { -#if CONFIG_HEVC_CUVID_HWACCEL - HWACCEL_CUVID(hevc), -#endif #if CONFIG_HEVC_DXVA2_HWACCEL HWACCEL_DXVA2(hevc), #endif @@ -3134,12 +3556,18 @@ AVCodec ff_hevc_decoder = { #if CONFIG_HEVC_D3D11VA2_HWACCEL HWACCEL_D3D11VA2(hevc), #endif +#if CONFIG_HEVC_NVDEC_HWACCEL + HWACCEL_NVDEC(hevc), +#endif #if CONFIG_HEVC_VAAPI_HWACCEL HWACCEL_VAAPI(hevc), #endif #if CONFIG_HEVC_VDPAU_HWACCEL HWACCEL_VDPAU(hevc), #endif +#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + HWACCEL_VIDEOTOOLBOX(hevc), +#endif NULL }, }; |