From cb668476ab1343d27e03edc0b32f57ca7a187471 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sun, 9 Oct 2011 20:18:34 +0100 Subject: motion_est: make MotionExtContext.map_generation unsigned The way this value is used, it should be an unsigned type. While the numerical value has no meaning, unsigned wraparound is relied upon. Signed-off-by: Mans Rullgard --- libavcodec/motion_est.c | 2 +- libavcodec/motion_est_template.c | 36 ++++++++++++++++++------------------ libavcodec/mpegvideo.h | 2 +- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c index d0f9367..c4512a9 100644 --- a/libavcodec/motion_est.c +++ b/libavcodec/motion_est.c @@ -52,7 +52,7 @@ static inline int sad_hpel_motion_search(MpegEncContext * s, int src_index, int ref_index, int size, int h); -static inline int update_map_generation(MotionEstContext *c) +static inline unsigned update_map_generation(MotionEstContext *c) { c->map_generation+= 1<<(ME_MAP_MV_BITS*2); if(c->map_generation==0){ diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c index 88e23ae..d0d4b41 100644 --- a/libavcodec/motion_est_template.c +++ b/libavcodec/motion_est_template.c @@ -89,8 +89,8 @@ static int hpel_motion_search(MpegEncContext * s, const int b= score_map[(index+(1<penalty_factor; - int key; - int map_generation= c->map_generation; + unsigned key; + unsigned map_generation= c->map_generation; #ifndef NDEBUG uint32_t *map= c->map; #endif @@ -208,7 +208,7 @@ static int qpel_motion_search(MpegEncContext * s, const int mx = *mx_ptr; const int my = *my_ptr; const int penalty_factor= c->sub_penalty_factor; - const int map_generation= c->map_generation; + const unsigned map_generation = c->map_generation; const int subpel_quality= c->avctx->me_subpel_quality; uint32_t *map= c->map; me_cmp_func cmpf, chroma_cmpf; @@ -354,7 +354,7 @@ static int qpel_motion_search(MpegEncContext * s, #define CHECK_MV(x,y)\ {\ - const int key= ((y)<= xmin);\ assert((x) <= xmax);\ @@ -382,7 +382,7 @@ static int qpel_motion_search(MpegEncContext * s, #define CHECK_MV_DIR(x,y,new_dir)\ {\ - const int key= ((y)<map_generation; + unsigned map_generation = c->map_generation; cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; { /* ensure that the best point is in the MAP as h/qpel refinement needs it */ - const int key= (best[1]<map_generation; + unsigned map_generation = c->map_generation; cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; @@ -503,7 +503,7 @@ static int hex_search(MpegEncContext * s, int *best, int dmin, me_cmp_func cmpf, chroma_cmpf; LOAD_COMMON LOAD_COMMON2 - int map_generation= c->map_generation; + unsigned map_generation = c->map_generation; int x,y,d; const int dec= dia_size & (dia_size-1); @@ -537,7 +537,7 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin, me_cmp_func cmpf, chroma_cmpf; LOAD_COMMON LOAD_COMMON2 - int map_generation= c->map_generation; + unsigned map_generation = c->map_generation; int x,y,i,d; int dia_size= c->dia_size&0xFF; const int dec= dia_size & (dia_size-1); @@ -575,7 +575,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin, me_cmp_func cmpf, chroma_cmpf; LOAD_COMMON LOAD_COMMON2 - int map_generation= c->map_generation; + unsigned map_generation = c->map_generation; int x,y,x2,y2, i, j, d; const int dia_size= c->dia_size&0xFE; static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2}, @@ -622,7 +622,7 @@ static int full_search(MpegEncContext * s, int *best, int dmin, me_cmp_func cmpf, chroma_cmpf; LOAD_COMMON LOAD_COMMON2 - int map_generation= c->map_generation; + unsigned map_generation = c->map_generation; int x,y, d; const int dia_size= c->dia_size&0xFF; @@ -651,7 +651,7 @@ static int full_search(MpegEncContext * s, int *best, int dmin, #define SAB_CHECK_MV(ax,ay)\ {\ - const int key= ((ay)<map_generation; + unsigned map_generation = c->map_generation; cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; @@ -775,7 +775,7 @@ static int var_diamond_search(MpegEncContext * s, int *best, int dmin, int dia_size; LOAD_COMMON LOAD_COMMON2 - int map_generation= c->map_generation; + unsigned map_generation = c->map_generation; cmpf= s->dsp.me_cmp[size]; chroma_cmpf= s->dsp.me_cmp[size+1]; @@ -867,7 +867,7 @@ static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int int d; ///< the score (cmp + penalty) of any given mv int dmin; /**< the best value of d, i.e. the score corresponding to the mv stored in best[]. */ - int map_generation; + unsigned map_generation; int penalty_factor; const int ref_mv_stride= s->mb_stride; //pass as arg FIXME const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME @@ -995,7 +995,7 @@ static int epzs_motion_search4(MpegEncContext * s, MotionEstContext * const c= &s->me; int best[2]={0, 0}; int d, dmin; - int map_generation; + unsigned map_generation; const int penalty_factor= c->penalty_factor; const int size=1; const int h=8; @@ -1055,7 +1055,7 @@ static int epzs_motion_search2(MpegEncContext * s, MotionEstContext * const c= &s->me; int best[2]={0, 0}; int d, dmin; - int map_generation; + unsigned map_generation; const int penalty_factor= c->penalty_factor; const int size=0; //FIXME pass as arg const int h=8; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index d0f4bfd..7de7217 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -154,7 +154,7 @@ typedef struct MotionEstContext{ int best_bits; uint32_t *map; ///< map to avoid duplicate evaluations uint32_t *score_map; ///< map to store the scores - int map_generation; + unsigned map_generation; int pre_penalty_factor; int penalty_factor; /**< an estimate of the bits required to code a given mv value, e.g. (1,0) takes -- cgit v1.1 From c79d2a20bad59298188171f1316a830d563a41ee Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 10 Oct 2011 20:41:31 +0100 Subject: sipr: fix get_bits(0) calls Zero-length get_bits() is undefined, must check before calling. Signed-off-by: Mans Rullgard --- libavcodec/sipr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c index f57ec4f..08dd63a 100644 --- a/libavcodec/sipr.c +++ b/libavcodec/sipr.c @@ -194,14 +194,16 @@ static void decode_parameters(SiprParameters* parms, GetBitContext *pgb, { int i, j; - parms->ma_pred_switch = get_bits(pgb, p->ma_predictor_bits); + if (p->ma_predictor_bits) + parms->ma_pred_switch = get_bits(pgb, p->ma_predictor_bits); for (i = 0; i < 5; i++) parms->vq_indexes[i] = get_bits(pgb, p->vq_indexes_bits[i]); for (i = 0; i < p->subframe_count; i++) { parms->pitch_delay[i] = get_bits(pgb, p->pitch_delay_bits[i]); - parms->gp_index[i] = get_bits(pgb, p->gp_index_bits); + if (p->gp_index_bits) + parms->gp_index[i] = get_bits(pgb, p->gp_index_bits); for (j = 0; j < p->number_of_fc_indexes; j++) parms->fc_indexes[i][j] = get_bits(pgb, p->fc_index_bits[j]); -- cgit v1.1 From be1242a3f2b28e9cb08515bdc1db6c14403c279a Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 11 Oct 2011 00:58:03 +0100 Subject: h264: fix detection of optional trailing PPS elements The PPS may contain a few trailing elements whose presence is only signalled by data remaining after the the mandatory part has been parsed. The current code fails to take into account the rbsp_trailing_bits() when deciding whether to parse these optional elements. Assuming no unnecessary padding bytes are passed to this function, the optional elements are present if either more than 8 extra bits remain or the remaining bits do not form a valid rbsp_trailing_bits() after the mandatory PPS elements have been parsed. Signed-off-by: Mans Rullgard --- libavcodec/h264_ps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index 7491807..bb673e9 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -462,6 +462,7 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){ unsigned int pps_id= get_ue_golomb(&s->gb); PPS *pps; const int qp_bd_offset = 6*(h->sps.bit_depth_luma-8); + int bits_left; if(pps_id >= MAX_PPS_COUNT) { av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id); @@ -538,7 +539,9 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){ memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4)); memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8)); - if(get_bits_count(&s->gb) < bit_length){ + bits_left = bit_length - get_bits_count(&s->gb); + if (bits_left && (bits_left > 8 || + show_bits(&s->gb, bits_left) != 1 << (bits_left - 1))) { pps->transform_8x8_mode= get_bits1(&s->gb); decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8); pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset -- cgit v1.1 From a7984a6a6dbdfbd95df4a669a452ddf34c485cab Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 21 Sep 2011 11:34:26 -0400 Subject: smacker: Separate audio flags from sample rates in smacker demuxer. Makes the code easier to understand. --- libavformat/smacker.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/libavformat/smacker.c b/libavformat/smacker.c index 995ef41..447e6e6 100644 --- a/libavformat/smacker.c +++ b/libavformat/smacker.c @@ -31,11 +31,11 @@ #define SMACKER_FLAG_RING_FRAME 0x01 enum SAudFlags { - SMK_AUD_PACKED = 0x80000000, - SMK_AUD_16BITS = 0x20000000, - SMK_AUD_STEREO = 0x10000000, - SMK_AUD_BINKAUD = 0x08000000, - SMK_AUD_USEDCT = 0x04000000 + SMK_AUD_PACKED = 0x80, + SMK_AUD_16BITS = 0x20, + SMK_AUD_STEREO = 0x10, + SMK_AUD_BINKAUD = 0x08, + SMK_AUD_USEDCT = 0x04 }; typedef struct SmackerContext { @@ -48,6 +48,7 @@ typedef struct SmackerContext { uint32_t audio[7]; uint32_t treesize; uint32_t mmap_size, mclr_size, full_size, type_size; + uint8_t aflags[7]; uint32_t rates[7]; uint32_t pad; /* frame info */ @@ -129,8 +130,10 @@ static int smacker_read_header(AVFormatContext *s, AVFormatParameters *ap) smk->mclr_size = avio_rl32(pb); smk->full_size = avio_rl32(pb); smk->type_size = avio_rl32(pb); - for(i = 0; i < 7; i++) - smk->rates[i] = avio_rl32(pb); + for(i = 0; i < 7; i++) { + smk->rates[i] = avio_rl24(pb); + smk->aflags[i] = avio_r8(pb); + } smk->pad = avio_rl32(pb); /* setup data */ if(smk->frames > 0xFFFFFF) { @@ -173,23 +176,23 @@ static int smacker_read_header(AVFormatContext *s, AVFormatParameters *ap) /* handle possible audio streams */ for(i = 0; i < 7; i++) { smk->indexes[i] = -1; - if(smk->rates[i] & 0xFFFFFF){ + if (smk->rates[i]) { ast[i] = av_new_stream(s, 0); smk->indexes[i] = ast[i]->index; ast[i]->codec->codec_type = AVMEDIA_TYPE_AUDIO; - if (smk->rates[i] & SMK_AUD_BINKAUD) { + if (smk->aflags[i] & SMK_AUD_BINKAUD) { ast[i]->codec->codec_id = CODEC_ID_BINKAUDIO_RDFT; - } else if (smk->rates[i] & SMK_AUD_USEDCT) { + } else if (smk->aflags[i] & SMK_AUD_USEDCT) { ast[i]->codec->codec_id = CODEC_ID_BINKAUDIO_DCT; - } else if (smk->rates[i] & SMK_AUD_PACKED){ + } else if (smk->aflags[i] & SMK_AUD_PACKED){ ast[i]->codec->codec_id = CODEC_ID_SMACKAUDIO; ast[i]->codec->codec_tag = MKTAG('S', 'M', 'K', 'A'); } else { ast[i]->codec->codec_id = CODEC_ID_PCM_U8; } - ast[i]->codec->channels = (smk->rates[i] & SMK_AUD_STEREO) ? 2 : 1; - ast[i]->codec->sample_rate = smk->rates[i] & 0xFFFFFF; - ast[i]->codec->bits_per_coded_sample = (smk->rates[i] & SMK_AUD_16BITS) ? 16 : 8; + ast[i]->codec->channels = (smk->aflags[i] & SMK_AUD_STEREO) ? 2 : 1; + ast[i]->codec->sample_rate = smk->rates[i]; + ast[i]->codec->bits_per_coded_sample = (smk->aflags[i] & SMK_AUD_16BITS) ? 16 : 8; if(ast[i]->codec->bits_per_coded_sample == 16 && ast[i]->codec->codec_id == CODEC_ID_PCM_U8) ast[i]->codec->codec_id = CODEC_ID_PCM_S16LE; av_set_pts_info(ast[i], 64, 1, ast[i]->codec->sample_rate -- cgit v1.1 From e190e453bd1e4d4b409ed3556b3a50d1087c15d7 Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 21 Sep 2011 11:37:51 -0400 Subject: smacker: validate number of channels --- libavcodec/smacker.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c index 496bbb5..f3dec7f 100644 --- a/libavcodec/smacker.c +++ b/libavcodec/smacker.c @@ -559,6 +559,10 @@ static av_cold int decode_end(AVCodecContext *avctx) static av_cold int smka_decode_init(AVCodecContext *avctx) { + if (avctx->channels < 1 || avctx->channels > 2) { + av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n"); + return AVERROR(EINVAL); + } avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO; avctx->sample_fmt = avctx->bits_per_coded_sample == 8 ? AV_SAMPLE_FMT_U8 : AV_SAMPLE_FMT_S16; return 0; -- cgit v1.1 From cf044f8bff0d28dbc34492f18b0d18b3ba8bad9d Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 21 Sep 2011 11:42:55 -0400 Subject: smacker: check buffer size before reading output size --- libavcodec/smacker.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c index f3dec7f..20ddc15 100644 --- a/libavcodec/smacker.c +++ b/libavcodec/smacker.c @@ -586,6 +586,11 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size, int bits, stereo; int pred[2] = {0, 0}; + if (buf_size <= 4) { + av_log(avctx, AV_LOG_ERROR, "packet is too small\n"); + return AVERROR(EINVAL); + } + unp_size = AV_RL32(buf); init_get_bits(&gb, buf + 4, (buf_size - 4) * 8); -- cgit v1.1 From ff1f89de2da3472d133e2c95bf7c9ad2d88df33d Mon Sep 17 00:00:00 2001 From: Justin Ruggles Date: Wed, 21 Sep 2011 11:49:33 -0400 Subject: smacker: validate channels and sample format. --- libavcodec/smacker.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c index 20ddc15..d8a3d77 100644 --- a/libavcodec/smacker.c +++ b/libavcodec/smacker.c @@ -606,6 +606,14 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size, av_log(avctx, AV_LOG_ERROR, "Frame is too large to fit in buffer\n"); return -1; } + if (stereo ^ (avctx->channels != 1)) { + av_log(avctx, AV_LOG_ERROR, "channels mismatch\n"); + return AVERROR(EINVAL); + } + if (bits && avctx->sample_fmt == AV_SAMPLE_FMT_U8) { + av_log(avctx, AV_LOG_ERROR, "sample format mismatch\n"); + return AVERROR(EINVAL); + } memset(vlc, 0, sizeof(VLC) * 4); memset(h, 0, sizeof(HuffContext) * 4); -- cgit v1.1 From f9a9c8f9bc88cd34d2393311055596e649e868fb Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 28 Sep 2011 14:47:31 +0200 Subject: flashsv: fix typo in av_log() message --- libavcodec/flashsv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c index 57d33c0..b1424a5 100644 --- a/libavcodec/flashsv.c +++ b/libavcodec/flashsv.c @@ -300,7 +300,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data, /* check for changes of image width and image height */ if (avctx->width != s->image_width || avctx->height != s->image_height) { av_log(avctx, AV_LOG_ERROR, - "Frame width or height differs from first frames!\n"); + "Frame width or height differs from first frame!\n"); av_log(avctx, AV_LOG_ERROR, "fh = %d, fv %d vs ch = %d, cv = %d\n", avctx->height, avctx->width, s->image_height, s->image_width); return AVERROR_INVALIDDATA; -- cgit v1.1 From 4b7f49082f80cf92475c999f8c42e112a13aed6a Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Wed, 28 Sep 2011 14:47:58 +0200 Subject: flashsv: return more meaningful error value --- libavcodec/flashsv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c index b1424a5..c99c21c 100644 --- a/libavcodec/flashsv.c +++ b/libavcodec/flashsv.c @@ -366,7 +366,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data, if (s->color_depth != 0 && s->color_depth != 2) { av_log(avctx, AV_LOG_ERROR, "%dx%d invalid color depth %d\n", i, j, s->color_depth); - return -1; + return AVERROR_INVALIDDATA; } if (has_diff) { -- cgit v1.1 From 25c27f379faaf75479111f451a78ac6da71a6e0c Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Thu, 29 Sep 2011 21:57:29 +0200 Subject: doc: fix references to obsolete presets directories for avconv/ffmpeg --- cmdutils.h | 2 +- doc/ffmpeg.texi | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmdutils.h b/cmdutils.h index a20b779..1c17433 100644 --- a/cmdutils.h +++ b/cmdutils.h @@ -334,7 +334,7 @@ int64_t guess_correct_pts(PtsCorrectionContext *ctx, int64_t pts, int64_t dts); * * If is_path is non-zero, look for the file in the path preset_name. * Otherwise search for a file named arg.ffpreset in the directories - * $FFMPEG_DATADIR (if set), $HOME/.ffmpeg, and in the datadir defined + * $AVCONV_DATADIR (if set), $HOME/.avconv, and in the datadir defined * at configuration time, in that order. If no such file is found and * codec_name is defined, then search for a file named * codec_name-preset_name.ffpreset in the above-mentioned directories. diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi index 2457d65..22989c0 100644 --- a/doc/ffmpeg.texi +++ b/doc/ffmpeg.texi @@ -806,8 +806,8 @@ preset options identifies the preset file to use according to the following rules: First ffmpeg searches for a file named @var{arg}.ffpreset in the -directories @file{$FFMPEG_DATADIR} (if set), and @file{$HOME/.ffmpeg}, and in -the datadir defined at configuration time (usually @file{PREFIX/share/ffmpeg}) +directories @file{$AVCONV_DATADIR} (if set), and @file{$HOME/.avconv}, and in +the datadir defined at configuration time (usually @file{PREFIX/share/avconv}) in that order. For example, if the argument is @code{libx264-max}, it will search for the file @file{libx264-max.ffpreset}. -- cgit v1.1 From dff4177546ef78ef7f3a133cf27ecd72b693181d Mon Sep 17 00:00:00 2001 From: Raivo Hool Date: Tue, 11 Oct 2011 10:53:39 +0300 Subject: mov: fix disc/track numbers and totals Signed-off-by: Anton Khirnov --- libavformat/mov.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 3190afd..f6f95c2 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -81,15 +81,20 @@ typedef struct MOVParseTableEntry { static const MOVParseTableEntry mov_default_parse_table[]; -static int mov_metadata_trkn(MOVContext *c, AVIOContext *pb, unsigned len) +static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb, + unsigned len, const char *key) { char buf[16]; + short current, total; avio_rb16(pb); // unknown - snprintf(buf, sizeof(buf), "%d", avio_rb16(pb)); - av_dict_set(&c->fc->metadata, "track", buf, 0); - - avio_rb16(pb); // total tracks + current = avio_rb16(pb); + total = avio_rb16(pb); + if (!total) + snprintf(buf, sizeof(buf), "%d", current); + else + snprintf(buf, sizeof(buf), "%d/%d", current, total); + av_dict_set(&c->fc->metadata, key, buf, 0); return 0; } @@ -140,7 +145,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) const char *key = NULL; uint16_t str_size, langcode = 0; uint32_t data_type = 0; - int (*parse)(MOVContext*, AVIOContext*, unsigned) = NULL; + int (*parse)(MOVContext*, AVIOContext*, unsigned, const char*) = NULL; switch (atom.type) { case MKTAG(0xa9,'n','a','m'): key = "title"; break; @@ -163,7 +168,9 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) case MKTAG( 't','v','e','n'): key = "episode_id";break; case MKTAG( 't','v','n','n'): key = "network"; break; case MKTAG( 't','r','k','n'): key = "track"; - parse = mov_metadata_trkn; break; + parse = mov_metadata_track_or_disc_number; break; + case MKTAG( 'd','i','s','k'): key = "disc"; + parse = mov_metadata_track_or_disc_number; break; } if (c->itunes_metadata && atom.size > 8) { @@ -198,7 +205,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) str_size = FFMIN3(sizeof(str)-1, str_size, atom.size); if (parse) - parse(c, pb, str_size); + parse(c, pb, str_size, key); else { if (data_type == 3 || (data_type == 0 && langcode < 0x800)) { // MAC Encoded mov_read_mac_string(c, pb, str_size, str, sizeof(str)); -- cgit v1.1 From 2804d320756dfd1d1927299f1c962699f4a39293 Mon Sep 17 00:00:00 2001 From: Raivo Hool Date: Tue, 11 Oct 2011 10:53:40 +0300 Subject: mov: read album_artist atom Signed-off-by: Anton Khirnov --- libavformat/mov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavformat/mov.c b/libavformat/mov.c index f6f95c2..9661149 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -151,6 +151,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom) case MKTAG(0xa9,'n','a','m'): key = "title"; break; case MKTAG(0xa9,'a','u','t'): case MKTAG(0xa9,'A','R','T'): key = "artist"; break; + case MKTAG( 'a','A','R','T'): key = "album_artist"; break; case MKTAG(0xa9,'w','r','t'): key = "composer"; break; case MKTAG( 'c','p','r','t'): case MKTAG(0xa9,'c','p','y'): key = "copyright"; break; -- cgit v1.1 From c780b543e72141393ae3c0b0cb2654f9a5e35f73 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Fri, 7 Oct 2011 20:09:56 +0200 Subject: id3v2: fix NULL pointer dereference Bug found by Laurent Aimar fenrir at videolan org --- libavformat/id3v2.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index c30ab4c..cea0ee0 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -351,7 +351,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t AVIOContext *pbx; unsigned char *buffer = NULL; int buffer_size = 0; - void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL; + const ID3v2EMFunc *extra_func; switch (version) { case 2: @@ -419,7 +419,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag); avio_skip(s->pb, tlen); /* check for text tag or supported special meta tag */ - } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)->read))) { + } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) { if (unsync || tunsync) { int i, j; av_fast_malloc(&buffer, &buffer_size, tlen); @@ -445,7 +445,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t read_ttag(s, pbx, tlen, tag); else /* parse special meta tag */ - extra_func(s, pbx, tlen, tag, extra_meta); + extra_func->read(s, pbx, tlen, tag, extra_meta); } else if (!tag[0]) { if (tag[1]) @@ -508,11 +508,11 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic) void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta) { ID3v2ExtraMeta *current = *extra_meta, *next; - void (*free_func)(ID3v2ExtraMeta*); + const ID3v2EMFunc *extra_func; while (current) { - if ((free_func = get_extra_meta_func(current->tag, 1)->free)) - free_func(current->data); + if ((extra_func = get_extra_meta_func(current->tag, 1))) + extra_func->free(current->data); next = current->next; av_freep(¤t); current = next; -- cgit v1.1 From 5a7ba58657bda249ac625456577651ab98a9d231 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Tue, 11 Oct 2011 10:12:38 +0200 Subject: mov: cosmetics, fix for and if spacing --- libavformat/mov.c | 96 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 9661149..4f5bb0c 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -271,7 +271,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom) int (*parse)(MOVContext*, AVIOContext*, MOVAtom) = NULL; a.size = atom.size; a.type=0; - if(atom.size >= 8) { + if (atom.size >= 8) { a.size = avio_rb32(pb); a.type = avio_rl32(pb); } @@ -288,7 +288,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom) break; } a.size -= 8; - if(a.size < 0) + if (a.size < 0) break; a.size = FFMIN(a.size, atom.size - total_size); @@ -455,11 +455,11 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom) if (type == MKTAG('v','i','d','e')) st->codec->codec_type = AVMEDIA_TYPE_VIDEO; - else if(type == MKTAG('s','o','u','n')) + else if (type == MKTAG('s','o','u','n')) st->codec->codec_type = AVMEDIA_TYPE_AUDIO; - else if(type == MKTAG('m','1','a',' ')) + else if (type == MKTAG('m','1','a',' ')) st->codec->codec_id = CODEC_ID_MP2; - else if((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p'))) + else if ((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p'))) st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; avio_rb32(pb); /* component manufacture */ @@ -556,7 +556,7 @@ static int mov_read_pasp(MOVContext *c, AVIOContext *pb, MOVAtom atom) /* this atom contains actual media data */ static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom) { - if(atom.size == 0) /* wrong one (MP4) */ + if (atom.size == 0) /* wrong one (MP4) */ return 0; c->found_mdat=1; return 0; /* now go for moov */ @@ -710,7 +710,7 @@ static int mov_read_smi(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; st = c->fc->streams[c->fc->nb_streams-1]; - if((uint64_t)atom.size > (1<<30)) + if ((uint64_t)atom.size > (1<<30)) return -1; // currently SVQ3 decoder expect full STSD header - so let's fake it @@ -769,10 +769,10 @@ static int mov_read_extradata(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; st= c->fc->streams[c->fc->nb_streams-1]; size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE; - if(size > INT_MAX || (uint64_t)atom.size > INT_MAX) + if (size > INT_MAX || (uint64_t)atom.size > INT_MAX) return -1; buf= av_realloc(st->codec->extradata, size); - if(!buf) + if (!buf) return -1; st->codec->extradata= buf; buf+= st->codec->extradata_size; @@ -791,7 +791,7 @@ static int mov_read_wave(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; st = c->fc->streams[c->fc->nb_streams-1]; - if((uint64_t)atom.size > (1<<30)) + if ((uint64_t)atom.size > (1<<30)) return -1; if (st->codec->codec_id == CODEC_ID_QDM2 || st->codec->codec_id == CODEC_ID_QDMC) { @@ -822,7 +822,7 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; st = c->fc->streams[c->fc->nb_streams-1]; - if((uint64_t)atom.size > (1<<30)) + if ((uint64_t)atom.size > (1<<30)) return -1; av_free(st->codec->extradata); @@ -849,7 +849,7 @@ static int mov_read_strf(MOVContext *c, AVIOContext *pb, MOVAtom atom) return 0; st = c->fc->streams[c->fc->nb_streams-1]; - if((uint64_t)atom.size > (1<<30)) + if ((uint64_t)atom.size > (1<<30)) return -1; av_free(st->codec->extradata); @@ -878,7 +878,7 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom) entries = avio_rb32(pb); - if(entries >= UINT_MAX/sizeof(int64_t)) + if (entries >= UINT_MAX/sizeof(int64_t)) return -1; sc->chunk_offsets = av_malloc(entries * sizeof(int64_t)); @@ -887,10 +887,10 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom) sc->chunk_count = entries; if (atom.type == MKTAG('s','t','c','o')) - for(i=0; ichunk_offsets[i] = avio_rb32(pb); else if (atom.type == MKTAG('c','o','6','4')) - for(i=0; ichunk_offsets[i] = avio_rb64(pb); else return -1; @@ -944,7 +944,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) st = c->fc->streams[c->fc->nb_streams-1]; sc = st->priv_data; - for(pseudo_stream_id=0; pseudo_stream_id 0) st->codec->codec_type = AVMEDIA_TYPE_VIDEO; - else if(st->codec->codec_type == AVMEDIA_TYPE_DATA){ + else if (st->codec->codec_type == AVMEDIA_TYPE_DATA){ id = ff_codec_get_id(ff_codec_movsubtitle_tags, format); - if(id > 0) + if (id > 0) st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE; } } @@ -1003,7 +1003,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) (format >> 0) & 0xff, (format >> 8) & 0xff, (format >> 16) & 0xff, (format >> 24) & 0xff, st->codec->codec_type); - if(st->codec->codec_type==AVMEDIA_TYPE_VIDEO) { + if (st->codec->codec_type==AVMEDIA_TYPE_VIDEO) { unsigned int color_depth, len; int color_greyscale; @@ -1106,7 +1106,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) } sc->has_palette = 1; } - } else if(st->codec->codec_type==AVMEDIA_TYPE_AUDIO) { + } else if (st->codec->codec_type==AVMEDIA_TYPE_AUDIO) { int bits_per_sample, flags; uint16_t version = avio_rb16(pb); @@ -1125,13 +1125,13 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) //Read QT version 1 fields. In version 0 these do not exist. av_dlog(c->fc, "version =%d, isom =%d\n",version,c->isom); - if(!c->isom) { - if(version==1) { + if (!c->isom) { + if (version==1) { sc->samples_per_frame = avio_rb32(pb); avio_rb32(pb); /* bytes per packet */ sc->bytes_per_frame = avio_rb32(pb); avio_rb32(pb); /* bytes per sample */ - } else if(version==2) { + } else if (version==2) { avio_rb32(pb); /* sizeof struct only */ st->codec->sample_rate = av_int2dbl(avio_rb64(pb)); /* float 64 */ st->codec->channels = avio_rb32(pb); @@ -1186,7 +1186,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) st->codec->bits_per_coded_sample = bits_per_sample; sc->sample_size = (bits_per_sample >> 3) * st->codec->channels; } - } else if(st->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){ + } else if (st->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){ // ttxt stsd contains display flags, justification, background // color, fonts, and default styles, so fake an atom to read it MOVAtom fake_atom = { .size = size - (avio_tell(pb) - start_pos) }; @@ -1208,7 +1208,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries) avio_skip(pb, a.size); } - if(st->codec->codec_type==AVMEDIA_TYPE_AUDIO && st->codec->sample_rate==0 && sc->time_scale>1) + if (st->codec->codec_type==AVMEDIA_TYPE_AUDIO && st->codec->sample_rate==0 && sc->time_scale>1) st->codec->sample_rate= sc->time_scale; /* special codec parameters handling */ @@ -1297,14 +1297,14 @@ static int mov_read_stsc(MOVContext *c, AVIOContext *pb, MOVAtom atom) av_dlog(c->fc, "track[%i].stsc.entries = %i\n", c->fc->nb_streams-1, entries); - if(entries >= UINT_MAX / sizeof(*sc->stsc_data)) + if (entries >= UINT_MAX / sizeof(*sc->stsc_data)) return -1; sc->stsc_data = av_malloc(entries * sizeof(*sc->stsc_data)); if (!sc->stsc_data) return AVERROR(ENOMEM); sc->stsc_count = entries; - for(i=0; istsc_data[i].first = avio_rb32(pb); sc->stsc_data[i].count = avio_rb32(pb); sc->stsc_data[i].id = avio_rb32(pb); @@ -1359,14 +1359,14 @@ static int mov_read_stss(MOVContext *c, AVIOContext *pb, MOVAtom atom) av_dlog(c->fc, "keyframe_count = %d\n", entries); - if(entries >= UINT_MAX / sizeof(int)) + if (entries >= UINT_MAX / sizeof(int)) return -1; sc->keyframes = av_malloc(entries * sizeof(int)); if (!sc->keyframes) return AVERROR(ENOMEM); sc->keyframe_count = entries; - for(i=0; ikeyframes[i] = avio_rb32(pb); //av_dlog(c->fc, "keyframes[]=%d\n", sc->keyframes[i]); } @@ -1434,7 +1434,7 @@ static int mov_read_stsz(MOVContext *c, AVIOContext *pb, MOVAtom atom) init_get_bits(&gb, buf, 8*num_bytes); - for(i=0; isample_sizes[i] = get_bits_long(&gb, field_size); av_free(buf); @@ -1458,16 +1458,19 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom) avio_rb24(pb); /* flags */ entries = avio_rb32(pb); - av_dlog(c->fc, "track[%i].stts.entries = %i\n", c->fc->nb_streams-1, entries); + av_dlog(c->fc, "track[%i].stts.entries = %i\n", + c->fc->nb_streams-1, entries); - if(entries >= UINT_MAX / sizeof(*sc->stts_data)) + if (entries >= UINT_MAX / sizeof(*sc->stts_data)) return -1; + sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data)); if (!sc->stts_data) return AVERROR(ENOMEM); + sc->stts_count = entries; - for(i=0; istts_data[i].count= sample_count; sc->stts_data[i].duration= sample_duration; - av_dlog(c->fc, "sample_count=%d, sample_duration=%d\n",sample_count,sample_duration); + av_dlog(c->fc, "sample_count=%d, sample_duration=%d\n", + sample_count, sample_duration); duration+=(int64_t)sample_duration*sample_count; total_sample_count+=sample_count; } st->nb_frames= total_sample_count; - if(duration) + if (duration) st->duration= duration; return 0; } @@ -1505,14 +1509,14 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom) av_dlog(c->fc, "track[%i].ctts.entries = %i\n", c->fc->nb_streams-1, entries); - if(entries >= UINT_MAX / sizeof(*sc->ctts_data)) + if (entries >= UINT_MAX / sizeof(*sc->ctts_data)) return -1; sc->ctts_data = av_malloc(entries * sizeof(*sc->ctts_data)); if (!sc->ctts_data) return AVERROR(ENOMEM); sc->ctts_count = entries; - for(i=0; isample_size > 0 ? sc->sample_size : sc->sample_sizes[current_sample]; - if(sc->pseudo_stream_id == -1 || + if (sc->pseudo_stream_id == -1 || sc->stsc_data[stsc_index].id - 1 == sc->pseudo_stream_id) { AVIndexEntry *e = &st->index_entries[st->nb_index_entries++]; e->pos = current_offset; @@ -2160,9 +2164,9 @@ static int mov_read_cmov(MOVContext *c, AVIOContext *pb, MOVAtom atom) return AVERROR(ENOMEM); } avio_read(pb, cmov_data, cmov_len); - if(uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK) + if (uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK) goto free_and_return; - if(ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0) + if (ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0) goto free_and_return; atom.type = MKTAG('m','o','o','v'); atom.size = moov_len; @@ -2191,10 +2195,10 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom) avio_rb24(pb); /* flags */ edit_count = avio_rb32(pb); /* entries */ - if((uint64_t)edit_count*12+8 > atom.size) + if ((uint64_t)edit_count*12+8 > atom.size) return -1; - for(i=0; i 1) + if (edit_count > 1) av_log(c->fc, AV_LOG_WARNING, "multiple edit list entries, " "a/v desync might occur, patch welcome\n"); @@ -2283,7 +2287,7 @@ static int mov_probe(AVProbeData *p) /* check file header */ offset = 0; - for(;;) { + for (;;) { /* ignore invalid offset */ if ((offset + 8) > (unsigned int)p->buf_size) return score; @@ -2391,7 +2395,7 @@ static int mov_read_header(AVFormatContext *s, AVFormatParameters *ap) mov->fc = s; /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */ - if(pb->seekable) + if (pb->seekable) atom.size = avio_size(pb); else atom.size = INT64_MAX; @@ -2602,7 +2606,7 @@ static int mov_read_close(AVFormatContext *s) } if (mov->dv_demux) { - for(i = 0; i < mov->dv_fctx->nb_streams; i++) { + for (i = 0; i < mov->dv_fctx->nb_streams; i++) { av_freep(&mov->dv_fctx->streams[i]->codec); av_freep(&mov->dv_fctx->streams[i]); } -- cgit v1.1 From 30c3d976f12665d5d13971172aab062a97cb1bce Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Tue, 11 Oct 2011 10:14:06 +0200 Subject: mov: do not misreport empty stts Return -1 instead of ENOMEM if entries is 0. Fixes a av_malloc(0) crash in macosx. --- libavformat/mov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c index 4f5bb0c..0e2ad1f 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -1461,8 +1461,8 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom) av_dlog(c->fc, "track[%i].stts.entries = %i\n", c->fc->nb_streams-1, entries); - if (entries >= UINT_MAX / sizeof(*sc->stts_data)) - return -1; + if (!entries || entries >= UINT_MAX / sizeof(*sc->stts_data)) + return AVERROR(EINVAL); sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data)); if (!sc->stts_data) -- cgit v1.1 From 88d1e2b2b0a129365a62efd666db0394e8ffbe08 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Sat, 8 Oct 2011 02:16:29 +0100 Subject: intfloat_readwrite: fix signed addition overflows These additions might overflow the signed range for large input values. Converting to unsigned before the addition rather than after avoids such undefined behaviour. The result under normal two's complement wraparound remains unchanged. Signed-off-by: Mans Rullgard --- libavutil/intfloat_readwrite.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavutil/intfloat_readwrite.c b/libavutil/intfloat_readwrite.c index 21a1c31..4c8de7b 100644 --- a/libavutil/intfloat_readwrite.c +++ b/libavutil/intfloat_readwrite.c @@ -30,13 +30,13 @@ #include "intfloat_readwrite.h" double av_int2dbl(int64_t v){ - if(v+v > 0xFFEULL<<52) + if((uint64_t)v+v > 0xFFEULL<<52) return NAN; return ldexp(((v&((1LL<<52)-1)) + (1LL<<52)) * (v>>63|1), (v>>52&0x7FF)-1075); } float av_int2flt(int32_t v){ - if(v+v > 0xFF000000U) + if((uint32_t)v+v > 0xFF000000U) return NAN; return ldexp(((v&0x7FFFFF) + (1<<23)) * (v>>31|1), (v>>23&0xFF)-150); } -- cgit v1.1 From 8babfc033ecb6332155c1f8879e54dee41d16952 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 11 Oct 2011 12:58:31 +0100 Subject: h264: fix invalid shifts in init_cavlc_level_tab() The level_code expression includes a shift which is invalid in those cases where the value is not used. Moving the calculation to the branch where the result is used avoids these. Signed-off-by: Mans Rullgard --- libavcodec/h264_cavlc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index 9e7c182..ca7b939 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -238,17 +238,18 @@ static inline int pred_non_zero_count(H264Context *h, int n){ } static av_cold void init_cavlc_level_tab(void){ - int suffix_length, mask; + int suffix_length; unsigned int i; for(suffix_length=0; suffix_length<7; suffix_length++){ for(i=0; i<(1<>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<>1) ^ mask) - mask; if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){ + int level_code = (prefix << suffix_length) + + (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length); + int mask = -(level_code&1); + level_code = (((2 + level_code) >> 1) ^ mask) - mask; cavlc_level_tab[suffix_length][i][0]= level_code; cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length; }else if(prefix + 1 <= LEVEL_TAB_BITS){ -- cgit v1.1 From 92fb52d9060a146f31da6f07ea9ce7867294e153 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 6 Oct 2011 08:03:38 -0700 Subject: prores: extract idct into its own dspcontext and merge with put_pixels. --- libavcodec/Makefile | 2 +- libavcodec/dsputil.c | 65 +++++++++++++++++++++---------------- libavcodec/dsputil.h | 2 ++ libavcodec/proresdec.c | 84 +++++++++++------------------------------------- libavcodec/proresdsp.c | 61 +++++++++++++++++++++++++++++++++++ libavcodec/proresdsp.h | 38 ++++++++++++++++++++++ libavcodec/simple_idct.c | 17 ++++++++++ libavcodec/simple_idct.h | 6 ++++ 8 files changed, 180 insertions(+), 95 deletions(-) create mode 100644 libavcodec/proresdsp.c create mode 100644 libavcodec/proresdsp.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3c4e2f8..b7b5124 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER) += png.o pngdec.o OBJS-$(CONFIG_PNG_ENCODER) += png.o pngenc.o OBJS-$(CONFIG_PPM_DECODER) += pnmdec.o pnm.o OBJS-$(CONFIG_PPM_ENCODER) += pnmenc.o pnm.o -OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o +OBJS-$(CONFIG_PRORES_DECODER) += proresdec.o proresdsp.o OBJS-$(CONFIG_PTX_DECODER) += ptx.o OBJS-$(CONFIG_QCELP_DECODER) += qcelpdec.o celp_math.o \ celp_filters.o acelp_vectors.o \ diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 967406e..182063c 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s } } +void ff_init_scantable_permutation(uint8_t *idct_permutation, + int idct_permutation_type) +{ + int i; + + switch(idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + case FF_PARTTRANS_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); + break; + case FF_SSE2_IDCT_PERM: + for(i=0; i<64; i++) + idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; + break; + default: + av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); + } +} + static int pix_sum_c(uint8_t * pix, int line_size) { int s, i, j; @@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; } - switch(c->idct_permutation_type){ - case FF_NO_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= i; - break; - case FF_LIBMPEG2_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); - break; - case FF_SIMPLE_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= simple_mmx_permutation[i]; - break; - case FF_TRANSPOSE_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= ((i&7)<<3) | (i>>3); - break; - case FF_PARTTRANS_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); - break; - case FF_SSE2_IDCT_PERM: - for(i=0; i<64; i++) - c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; - break; - default: - av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); - } + ff_init_scantable_permutation(c->idct_permutation, + c->idct_permutation_type); } diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 73830f8..bef2cdd 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -202,6 +202,8 @@ typedef struct ScanTable{ } ScanTable; void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); +void ff_init_scantable_permutation(uint8_t *idct_permutation, + int idct_permutation_type); #define EMULATED_EDGE(depth) \ void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\ diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index c70d145..cbbd068 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -34,17 +34,11 @@ #include "libavutil/intmath.h" #include "avcodec.h" -#include "dsputil.h" +#include "proresdsp.h" #include "get_bits.h" -#define BITS_PER_SAMPLE 10 ///< output precision of that decoder -#define BIAS (1 << (BITS_PER_SAMPLE - 1)) ///< bias value for converting signed pixels into unsigned ones -#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8)) ///< minimum value for clipping resulting pixels -#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels - - typedef struct { - DSPContext dsp; + ProresDSPContext dsp; AVFrame picture; ScanTable scantable; int scantable_type; ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced @@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx) avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format - avctx->bits_per_raw_sample = BITS_PER_SAMPLE; - dsputil_init(&ctx->dsp, avctx); + avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE; + ff_proresdsp_init(&ctx->dsp); avctx->coded_frame = &ctx->picture; avcodec_get_frame_defaults(&ctx->picture); @@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out, } -#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX)) - -/** - * Add bias value, clamp and output pixels of a slice - */ -static void put_pixels(const DCTELEM *in, uint16_t *out, int stride, - int mbs_per_slice, int blocks_per_mb) -{ - int mb, x, y, src_offset, dst_offset; - const DCTELEM *src1, *src2; - uint16_t *dst1, *dst2; - - src1 = in; - src2 = in + (blocks_per_mb << 5); - dst1 = out; - dst2 = out + (stride << 3); - - for (mb = 0; mb < mbs_per_slice; mb++) { - for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) { - for (x = 0; x < 8; x++) { - src_offset = (y << 3) + x; - - dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]); - dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]); - - if (blocks_per_mb > 2) { - dst1[dst_offset + x + 8] = - CLIP_AND_BIAS(src1[src_offset + 64]); - dst2[dst_offset + x + 8] = - CLIP_AND_BIAS(src2[src_offset + 64]); - } - } - } - - src1 += blocks_per_mb << 6; - src2 += blocks_per_mb << 6; - dst1 += blocks_per_mb << 2; - dst2 += blocks_per_mb << 2; - } -} - - /** * Decode a slice plane (luma or chroma). */ @@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, { GetBitContext gb; DCTELEM *block_ptr; - int i, blk_num, blocks_per_slice; + int mb_num, blocks_per_slice; blocks_per_slice = mbs_per_slice * blocks_per_mb; @@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, /* inverse quantization, inverse transform and output */ block_ptr = ctx->blocks; - for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) { - /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1 - * and the input of the inverse transform should be scaled by 2 - * in order to avoid rounding errors. - * Due to the fact the existing Libav transforms are incompatible with - * that input I temporally introduced the coarse solution below... */ - for (i = 0; i < 64; i++) - block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2; - - ctx->dsp.idct(block_ptr); + for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) { + ctx->dsp.idct_put(out_ptr, linesize, block_ptr, qmat); + block_ptr += 64; + if (blocks_per_mb > 2) { + ctx->dsp.idct_put(out_ptr + 8, linesize, block_ptr, qmat); + block_ptr += 64; + } + ctx->dsp.idct_put(out_ptr + linesize * 4, linesize, block_ptr, qmat); + block_ptr += 64; + if (blocks_per_mb > 2) { + ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat); + block_ptr += 64; + } } - - put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice, - blocks_per_mb); } diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c new file mode 100644 index 0000000..3038176 --- /dev/null +++ b/libavcodec/proresdsp.c @@ -0,0 +1,61 @@ +/* + * Apple ProRes compatible decoder + * + * Copyright (c) 2010-2011 Maxim Poliakovski + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "proresdsp.h" +#include "simple_idct.h" + +#define BIAS (1 << (PRORES_BITS_PER_SAMPLE - 1)) ///< bias value for converting signed pixels into unsigned ones +#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8)) ///< minimum value for clipping resulting pixels +#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels + +#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX)) + +/** + * Add bias value, clamp and output pixels of a slice + */ +static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in) +{ + int x, y, src_offset, dst_offset; + + for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) { + for (x = 0; x < 8; x++) { + src_offset = (y << 3) + x; + + dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]); + } + } +} + +static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat) +{ + ff_prores_idct(block, qmat); + put_pixels(out, linesize >> 1, block); +} + +void ff_proresdsp_init(ProresDSPContext *dsp) +{ + dsp->idct_put = prores_idct_put_c; + dsp->idct_permutation_type = FF_NO_IDCT_PERM; + + ff_init_scantable_permutation(dsp->idct_permutation, + dsp->idct_permutation_type); +} diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h new file mode 100644 index 0000000..18d6bf5 --- /dev/null +++ b/libavcodec/proresdsp.h @@ -0,0 +1,38 @@ +/* + * Apple ProRes compatible decoder + * + * Copyright (c) 2010-2011 Maxim Poliakovski + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PRORESDSP_H +#define AVCODEC_PRORESDSP_H + +#include "dsputil.h" + +#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder + +typedef struct { + int idct_permutation_type; + uint8_t idct_permutation[64]; + void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat); +} ProresDSPContext; + +void ff_proresdsp_init(ProresDSPContext *dsp); + +#endif /* AVCODEC_PRORESDSP_H */ diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index b62658b..c6cd908 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block) idct4col_add(dest + i, line_size, block + i); } } + +void ff_prores_idct(DCTELEM *block, const int16_t *qmat) +{ + int i; + + for (i = 0; i < 64; i++) + block[i] *= qmat[i]; + + for (i = 0; i < 8; i++) + idctRowCondDC_10(block + i*8); + + for (i = 0; i < 64; i++) + block[i] >>= 2; + + for (i = 0; i < 8; i++) + idctSparseCol_10(block + i); +} diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h index a33eb96..6e22158 100644 --- a/libavcodec/simple_idct.h +++ b/libavcodec/simple_idct.h @@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block); void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block); void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block); void ff_simple_idct_10(DCTELEM *block); +/** + * Special version of ff_simple_idct_10() which does dequantization + * and scales by a factor of 2 more between the two IDCTs to account + * for larger scale of input coefficients. + */ +void ff_prores_idct(DCTELEM *block, const int16_t *qmat); void ff_simple_idct_mmx(int16_t *block); void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block); -- cgit v1.1 From 6aa3cac6bf561712086ae413a36b5f05087c8887 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 8 Oct 2011 21:40:01 -0700 Subject: swscale: use aligned move for storage into temporary buffer. The intermediate buffer is always aligned. --- libswscale/x86/scale.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index ebaab34..d355894 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -369,7 +369,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 cvtps2dq m0, m0 %endif ; mmx/sse2/ssse3/sse4 %ifnidn %3, X - movu [r1+r2*(4>>r2shr)], m0 + mova [r1+r2*(4>>r2shr)], m0 %else ; %3 == X movq [r1+r2*4], m0 %endif ; %3 ==/!= X -- cgit v1.1 From e3f530feca80627e278e15fbe3b60cef7a6b630d Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 30 Sep 2011 14:37:11 +0200 Subject: prores: idct sse2/sse4 optimizations. ~3.0-3.5x as fast as original C version, 1.6x as fast overall. --- libavcodec/proresdec.c | 4 +- libavcodec/proresdsp.c | 2 + libavcodec/proresdsp.h | 2 + libavcodec/x86/Makefile | 2 + libavcodec/x86/dsputil_mmx.c | 2 + libavcodec/x86/proresdsp-init.c | 54 +++++ libavcodec/x86/proresdsp.asm | 432 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 496 insertions(+), 2 deletions(-) create mode 100644 libavcodec/x86/proresdsp-init.c create mode 100644 libavcodec/x86/proresdsp.asm diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index cbbd068..3b539a1 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -545,8 +545,8 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num, if (ctx->qmat_changed || sf != ctx->prev_slice_sf) { ctx->prev_slice_sf = sf; for (i = 0; i < 64; i++) { - ctx->qmat_luma_scaled[i] = ctx->qmat_luma[i] * sf; - ctx->qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * sf; + ctx->qmat_luma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_luma[i] * sf; + ctx->qmat_chroma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_chroma[i] * sf; } } diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c index 3038176..7e753e9 100644 --- a/libavcodec/proresdsp.c +++ b/libavcodec/proresdsp.c @@ -56,6 +56,8 @@ void ff_proresdsp_init(ProresDSPContext *dsp) dsp->idct_put = prores_idct_put_c; dsp->idct_permutation_type = FF_NO_IDCT_PERM; + if (HAVE_MMX) ff_proresdsp_x86_init(dsp); + ff_init_scantable_permutation(dsp->idct_permutation, dsp->idct_permutation_type); } diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h index 18d6bf5..8b864fa 100644 --- a/libavcodec/proresdsp.h +++ b/libavcodec/proresdsp.h @@ -35,4 +35,6 @@ typedef struct { void ff_proresdsp_init(ProresDSPContext *dsp); +void ff_proresdsp_x86_init(ProresDSPContext *dsp); + #endif /* AVCODEC_PRORESDSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index a94f97a..ab13109 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -33,6 +33,8 @@ MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_yasm.o MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o +YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o +MMX-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp-init.o MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 2fb75cb..58620d6 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -64,6 +64,8 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_512) = {0x0200020002000200ULL, 0x0200020002000200ULL}; +DECLARE_ALIGNED(16, const xmm_reg, ff_pw_1019)= {0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x0000000000000000ULL}; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x0101010101010101ULL}; diff --git a/libavcodec/x86/proresdsp-init.c b/libavcodec/x86/proresdsp-init.c new file mode 100644 index 0000000..9760105 --- /dev/null +++ b/libavcodec/x86/proresdsp-init.c @@ -0,0 +1,54 @@ +/* + * Apple ProRes compatible decoder + * + * Copyright (c) 2010-2011 Maxim Poliakovski + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/proresdsp.h" + +void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, + DCTELEM *block); +void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize, + DCTELEM *block); +void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize, + DCTELEM *block); + +void ff_proresdsp_x86_init(ProresDSPContext *dsp) +{ +#if ARCH_X86_64 + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_SSE2) { + dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; + dsp->idct_put = ff_prores_idct_put_10_sse2; + } + + if (flags & AV_CPU_FLAG_SSE4) { + dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; + dsp->idct_put = ff_prores_idct_put_10_sse4; + } + +#if HAVE_AVX + if (flags & AV_CPU_FLAG_AVX) { + dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; + dsp->idct_put = ff_prores_idct_put_10_avx; + } +#endif +#endif +} diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm new file mode 100644 index 0000000..9365bf1 --- /dev/null +++ b/libavcodec/x86/proresdsp.asm @@ -0,0 +1,432 @@ +;****************************************************************************** +;* x86-SIMD-optimized IDCT for prores +;* this is identical to "simple" IDCT except for the clip range +;* +;* Copyright (c) 2011 Ronald S. Bultje +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1 +%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1 +%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2 +%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1 +%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1 +%define W6sh2 8867 ; W6 = 35468 = 8867<<2 +%define W7sh2 4520 ; W7 = 18081 = 4520<<2 + 1 + +%ifdef ARCH_X86_64 + +SECTION_RODATA + +w4_plus_w2: times 4 dw W4sh2, +W2sh2 +w4_min_w2: times 4 dw W4sh2, -W2sh2 +w4_plus_w6: times 4 dw W4sh2, +W6sh2 +w4_min_w6: times 4 dw W4sh2, -W6sh2 +w1_plus_w3: times 4 dw W1sh2, +W3sh2 +w3_min_w1: times 4 dw W3sh2, -W1sh2 +w7_plus_w3: times 4 dw W7sh2, +W3sh2 +w3_min_w7: times 4 dw W3sh2, -W7sh2 +w1_plus_w5: times 4 dw W1sh2, +W5sh2 +w5_min_w1: times 4 dw W5sh2, -W1sh2 +w5_plus_w7: times 4 dw W5sh2, +W7sh2 +w7_min_w5: times 4 dw W7sh2, -W5sh2 +row_round: times 8 dw (1<<14) + +cextern pw_4 +cextern pw_8 +cextern pw_512 +cextern pw_1019 + +section .text align=16 + +; interleave data while maintaining source +; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave +%macro SBUTTERFLY3 5 + punpckl%1 m%2, m%4, m%5 + punpckh%1 m%3, m%4, m%5 +%endmacro + +; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift +; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6 +; %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3) +%macro SUMSUB_SHPK 7 + psubd %3, %1, %5 ; { a0 - b0 }[0-3] + psubd %4, %2, %6 ; { a0 - b0 }[4-7] + paddd %1, %5 ; { a0 + b0 }[0-3] + paddd %2, %6 ; { a0 + b0 }[4-7] + psrad %1, %7 + psrad %2, %7 + psrad %3, %7 + psrad %4, %7 + packssdw %1, %2 ; row[0] + packssdw %3, %4 ; row[7] +%endmacro + +; %1 = row or col (for rounding variable) +; %2 = number of bits to shift at the end +; %3 = optimization +%macro IDCT_1D 3 + ; a0 = (W4 * row[0]) + (1 << (15 - 1)); + ; a1 = a0; + ; a2 = a0; + ; a3 = a0; + ; a0 += W2 * row[2]; + ; a1 += W6 * row[2]; + ; a2 -= W6 * row[2]; + ; a3 -= W2 * row[2]; +%ifidn %1, col + paddw m10,[pw_8] +%endif + SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[0], row[2] }[0-3]/[4-7] +%ifidn %1, row + psubw m10,[row_round] +%endif + SIGNEXTEND m8, m9, m14 ; { row[2] }[0-3] / [4-7] + SIGNEXTEND m10, m11, m14 ; { row[0] }[0-3] / [4-7] + pmaddwd m2, m0, [w4_plus_w6] + pmaddwd m3, m1, [w4_plus_w6] + pmaddwd m4, m0, [w4_min_w6] + pmaddwd m5, m1, [w4_min_w6] + pmaddwd m6, m0, [w4_min_w2] + pmaddwd m7, m1, [w4_min_w2] + pmaddwd m0, [w4_plus_w2] + pmaddwd m1, [w4_plus_w2] + pslld m2, 2 + pslld m3, 2 + pslld m4, 2 + pslld m5, 2 + pslld m6, 2 + pslld m7, 2 + pslld m0, 2 + pslld m1, 2 + + ; a0: -1*row[0]-1*row[2] + ; a1: -1*row[0] + ; a2: -1*row[0] + ; a3: -1*row[0]+1*row[2] + psubd m2, m10 ; a1[0-3] + psubd m3, m11 ; a1[4-7] + psubd m4, m10 ; a2[0-3] + psubd m5, m11 ; a2[4-7] + psubd m0, m10 + psubd m1, m11 + psubd m6, m10 + psubd m7, m11 + psubd m0, m8 ; a0[0-3] + psubd m1, m9 ; a0[4-7] + paddd m6, m8 ; a3[0-3] + paddd m7, m9 ; a3[4-7] + + ; a0 += W4*row[4] + W6*row[6]; i.e. -1*row[4] + ; a1 -= W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6] + ; a2 -= W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6] + ; a3 += W4*row[4] - W6*row[6]; i.e. -1*row[4] + SBUTTERFLY3 wd, 8, 9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7] + SIGNEXTEND m13, m14, m10 ; { row[4] }[0-3] / [4-7] + pmaddwd m10, m8, [w4_plus_w6] + pmaddwd m11, m9, [w4_plus_w6] + pslld m10, 2 + pslld m11, 2 + psubd m10, m13 + psubd m11, m14 + paddd m0, m10 ; a0[0-3] + paddd m1, m11 ; a0[4-7] + pmaddwd m10, m8, [w4_min_w6] + pmaddwd m11, m9, [w4_min_w6] + pslld m10, 2 + pslld m11, 2 + psubd m10, m13 + psubd m11, m14 + paddd m6, m10 ; a3[0-3] + paddd m7, m11 ; a3[4-7] + pmaddwd m10, m8, [w4_min_w2] + pmaddwd m11, m9, [w4_min_w2] + pmaddwd m8, [w4_plus_w2] + pmaddwd m9, [w4_plus_w2] + pslld m10, 2 + pslld m11, 2 + pslld m8, 2 + pslld m9, 2 + psubd m10, m13 + psubd m11, m14 + psubd m8, m13 + psubd m9, m14 + psubd m4, m10 ; a2[0-3] intermediate + psubd m5, m11 ; a2[4-7] intermediate + psubd m2, m8 ; a1[0-3] intermediate + psubd m3, m9 ; a1[4-7] intermediate + SIGNEXTEND m12, m13, m10 ; { row[6] }[0-3] / [4-7] + psubd m4, m12 ; a2[0-3] + psubd m5, m13 ; a2[4-7] + paddd m2, m12 ; a1[0-3] + paddd m3, m13 ; a1[4-7] + + ; load/store + mova [r2+ 0], m0 + mova [r2+ 32], m2 + mova [r2+ 64], m4 + mova [r2+ 96], m6 + mova m10,[r2+ 16] ; { row[1] }[0-7] + mova m8, [r2+ 48] ; { row[3] }[0-7] + mova m13,[r2+ 80] ; { row[5] }[0-7] + mova m14,[r2+112] ; { row[7] }[0-7] + mova [r2+ 16], m1 + mova [r2+ 48], m3 + mova [r2+ 80], m5 + mova [r2+112], m7 +%ifidn %1, row + pmullw m10,[r3+ 16] + pmullw m8, [r3+ 48] + pmullw m13,[r3+ 80] + pmullw m14,[r3+112] +%endif + + ; b0 = MUL(W1, row[1]); + ; MAC(b0, W3, row[3]); + ; b1 = MUL(W3, row[1]); + ; MAC(b1, -W7, row[3]); + ; b2 = MUL(W5, row[1]); + ; MAC(b2, -W1, row[3]); + ; b3 = MUL(W7, row[1]); + ; MAC(b3, -W5, row[3]); + SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[1], row[3] }[0-3]/[4-7] + SIGNEXTEND m10, m11, m12 ; { row[1] }[0-3] / [4-7] + SIGNEXTEND m8, m9, m12 ; { row[3] }[0-3] / [4-7] + pmaddwd m2, m0, [w3_min_w7] + pmaddwd m3, m1, [w3_min_w7] + pmaddwd m4, m0, [w5_min_w1] + pmaddwd m5, m1, [w5_min_w1] + pmaddwd m6, m0, [w7_min_w5] + pmaddwd m7, m1, [w7_min_w5] + pmaddwd m0, [w1_plus_w3] + pmaddwd m1, [w1_plus_w3] + pslld m2, 2 + pslld m3, 2 + pslld m4, 2 + pslld m5, 2 + pslld m6, 2 + pslld m7, 2 + pslld m0, 2 + pslld m1, 2 + + ; b0: +1*row[1]+2*row[3] + ; b1: +2*row[1]-1*row[3] + ; b2: -1*row[1]-1*row[3] + ; b3: +1*row[1]+1*row[3] + psubd m2, m8 + psubd m3, m9 + paddd m0, m8 + paddd m1, m9 + paddd m8, m10 ; { row[1] + row[3] }[0-3] + paddd m9, m11 ; { row[1] + row[3] }[4-7] + paddd m10, m10 + paddd m11, m11 + paddd m0, m8 ; b0[0-3] + paddd m1, m9 ; b0[4-7] + paddd m2, m10 ; b1[0-3] + paddd m3, m11 ; b2[4-7] + psubd m4, m8 ; b2[0-3] + psubd m5, m9 ; b2[4-7] + paddd m6, m8 ; b3[0-3] + paddd m7, m9 ; b3[4-7] + + ; MAC(b0, W5, row[5]); + ; MAC(b0, W7, row[7]); + ; MAC(b1, -W1, row[5]); + ; MAC(b1, -W5, row[7]); + ; MAC(b2, W7, row[5]); + ; MAC(b2, W3, row[7]); + ; MAC(b3, W3, row[5]); + ; MAC(b3, -W1, row[7]); + SBUTTERFLY3 wd, 8, 9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7] + SIGNEXTEND m13, m12, m11 ; { row[5] }[0-3] / [4-7] + SIGNEXTEND m14, m11, m10 ; { row[7] }[0-3] / [4-7] + + ; b0: -1*row[5]+1*row[7] + ; b1: -1*row[5]+1*row[7] + ; b2: +1*row[5]+2*row[7] + ; b3: +2*row[5]-1*row[7] + paddd m4, m13 + paddd m5, m12 + paddd m6, m13 + paddd m7, m12 + psubd m13, m14 ; { row[5] - row[7] }[0-3] + psubd m12, m11 ; { row[5] - row[7] }[4-7] + paddd m14, m14 + paddd m11, m11 + psubd m0, m13 + psubd m1, m12 + psubd m2, m13 + psubd m3, m12 + paddd m4, m14 + paddd m5, m11 + paddd m6, m13 + paddd m7, m12 + + pmaddwd m10, m8, [w1_plus_w5] + pmaddwd m11, m9, [w1_plus_w5] + pmaddwd m12, m8, [w5_plus_w7] + pmaddwd m13, m9, [w5_plus_w7] + pslld m10, 2 + pslld m11, 2 + pslld m12, 2 + pslld m13, 2 + psubd m2, m10 ; b1[0-3] + psubd m3, m11 ; b1[4-7] + paddd m0, m12 ; b0[0-3] + paddd m1, m13 ; b0[4-7] + pmaddwd m12, m8, [w7_plus_w3] + pmaddwd m13, m9, [w7_plus_w3] + pmaddwd m8, [w3_min_w1] + pmaddwd m9, [w3_min_w1] + pslld m12, 2 + pslld m13, 2 + pslld m8, 2 + pslld m9, 2 + paddd m4, m12 ; b2[0-3] + paddd m5, m13 ; b2[4-7] + paddd m6, m8 ; b3[0-3] + paddd m7, m9 ; b3[4-7] + + ; row[0] = (a0 + b0) >> 15; + ; row[7] = (a0 - b0) >> 15; + ; row[1] = (a1 + b1) >> 15; + ; row[6] = (a1 - b1) >> 15; + ; row[2] = (a2 + b2) >> 15; + ; row[5] = (a2 - b2) >> 15; + ; row[3] = (a3 + b3) >> 15; + ; row[4] = (a3 - b3) >> 15; + mova m8, [r2+ 0] ; a0[0-3] + mova m9, [r2+16] ; a0[4-7] + SUMSUB_SHPK m8, m9, m10, m11, m0, m1, %2 + mova m0, [r2+32] ; a1[0-3] + mova m1, [r2+48] ; a1[4-7] + SUMSUB_SHPK m0, m1, m9, m11, m2, m3, %2 + mova m1, [r2+64] ; a2[0-3] + mova m2, [r2+80] ; a2[4-7] + SUMSUB_SHPK m1, m2, m11, m3, m4, m5, %2 + mova m2, [r2+96] ; a3[0-3] + mova m3, [r2+112] ; a3[4-7] + SUMSUB_SHPK m2, m3, m4, m5, m6, m7, %2 +%endmacro + +; void prores_idct_put_10_(uint8_t *pixels, int stride, +; DCTELEM *block, const int16_t *qmat); +%macro idct_put_fn 2 +cglobal prores_idct_put_10_%1, 4, 4, %2 + movsxd r1, r1d + pxor m15, m15 ; zero + + ; for (i = 0; i < 8; i++) + ; idctRowCondDC(block + i*8); + mova m10,[r2+ 0] ; { row[0] }[0-7] + mova m8, [r2+32] ; { row[2] }[0-7] + mova m13,[r2+64] ; { row[4] }[0-7] + mova m12,[r2+96] ; { row[6] }[0-7] + + pmullw m10,[r3+ 0] + pmullw m8, [r3+32] + pmullw m13,[r3+64] + pmullw m12,[r3+96] + + IDCT_1D row, 17, %1 + + ; transpose for second part of IDCT + TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3 + mova [r2+ 16], m0 + mova [r2+ 48], m2 + mova [r2+ 80], m11 + mova [r2+112], m10 + SWAP 8, 10 + SWAP 1, 8 + SWAP 4, 13 + SWAP 9, 12 + + ; for (i = 0; i < 8; i++) + ; idctSparseColAdd(dest + i, line_size, block + i); + IDCT_1D col, 20, %1 + + ; clip/store + mova m6, [pw_512] + mova m3, [pw_4] + mova m5, [pw_1019] + paddw m8, m6 + paddw m0, m6 + paddw m1, m6 + paddw m2, m6 + paddw m4, m6 + paddw m11, m6 + paddw m9, m6 + paddw m10, m6 + pmaxsw m8, m3 + pmaxsw m0, m3 + pmaxsw m1, m3 + pmaxsw m2, m3 + pmaxsw m4, m3 + pmaxsw m11, m3 + pmaxsw m9, m3 + pmaxsw m10, m3 + pminsw m8, m5 + pminsw m0, m5 + pminsw m1, m5 + pminsw m2, m5 + pminsw m4, m5 + pminsw m11, m5 + pminsw m9, m5 + pminsw m10, m5 + + lea r2, [r1*3] + mova [r0 ], m8 + mova [r0+r1 ], m0 + mova [r0+r1*2], m1 + mova [r0+r2 ], m2 + lea r0, [r0+r1*4] + mova [r0 ], m4 + mova [r0+r1 ], m11 + mova [r0+r1*2], m9 + mova [r0+r2 ], m10 + RET +%endmacro + +%macro signextend_sse2 3 ; dstlow, dsthigh, tmp + pxor %3, %3 + pcmpgtw %3, %1 + mova %2, %1 + punpcklwd %1, %3 + punpckhwd %2, %3 +%endmacro + +%macro signextend_sse4 2-3 ; dstlow, dsthigh + movhlps %2, %1 + pmovsxwd %1, %1 + pmovsxwd %2, %2 +%endmacro + +INIT_XMM +%define SIGNEXTEND signextend_sse2 +idct_put_fn sse2, 16 +INIT_XMM +%define SIGNEXTEND signextend_sse4 +idct_put_fn sse4, 16 +INIT_AVX +idct_put_fn avx, 16 + +%endif -- cgit v1.1 From 91f4732913f657f426f2707c45b9026e12b04eb2 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 6 Oct 2011 07:17:44 -0700 Subject: prores: implement multithreading. 60% speedup (overall decoding time) at 2 threads, and another 60% speedup at 4 threads. --- libavcodec/proresdec.c | 71 +++++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c index 3b539a1..83c083f 100644 --- a/libavcodec/proresdec.c +++ b/libavcodec/proresdec.c @@ -38,6 +38,14 @@ #include "get_bits.h" typedef struct { + const uint8_t *index; ///< pointers to the data of this slice + int slice_num; + int x_pos, y_pos; + int slice_width; + DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]); +} ProresThreadData; + +typedef struct { ProresDSPContext dsp; AVFrame picture; ScanTable scantable; @@ -51,9 +59,9 @@ typedef struct { int prev_slice_sf; ///< scalefactor of the previous decoded slice DECLARE_ALIGNED(16, int16_t, qmat_luma_scaled[64]); DECLARE_ALIGNED(16, int16_t, qmat_chroma_scaled[64]); - DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]); int total_slices; ///< total number of slices in a picture - const uint8_t **slice_data_index; ///< array of pointers to the data of each slice + ProresThreadData *slice_data; + int pic_num; int chroma_factor; int mb_chroma_factor; int num_chroma_blocks; ///< number of chrominance blocks in a macroblock @@ -94,7 +102,7 @@ static av_cold int decode_init(AVCodecContext *avctx) ProresContext *ctx = avctx->priv_data; ctx->total_slices = 0; - ctx->slice_data_index = 0; + ctx->slice_data = NULL; avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format @@ -265,9 +273,9 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf, } if (ctx->total_slices != num_slices) { - av_freep(&ctx->slice_data_index); - ctx->slice_data_index = av_malloc((num_slices + 1) * sizeof(uint8_t*)); - if (!ctx->slice_data_index) + av_freep(&ctx->slice_data); + ctx->slice_data = av_malloc((num_slices + 1) * sizeof(ctx->slice_data[0])); + if (!ctx->slice_data) return AVERROR(ENOMEM); ctx->total_slices = num_slices; } @@ -282,10 +290,10 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf, data_ptr = index_ptr + num_slices * 2; for (i = 0; i < num_slices; i++) { - ctx->slice_data_index[i] = data_ptr; + ctx->slice_data[i].index = data_ptr; data_ptr += AV_RB16(index_ptr + i * 2); } - ctx->slice_data_index[i] = data_ptr; + ctx->slice_data[i].index = data_ptr; if (data_ptr > buf + data_size) { av_log(avctx, AV_LOG_ERROR, "out of slice data\n"); @@ -446,7 +454,8 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out, /** * Decode a slice plane (luma or chroma). */ -static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, +static void decode_slice_plane(ProresContext *ctx, ProresThreadData *td, + const uint8_t *buf, int data_size, uint16_t *out_ptr, int linesize, int mbs_per_slice, int blocks_per_mb, int plane_size_factor, @@ -458,17 +467,17 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, blocks_per_slice = mbs_per_slice * blocks_per_mb; - memset(ctx->blocks, 0, 8 * 4 * 64 * sizeof(*ctx->blocks)); + memset(td->blocks, 0, 8 * 4 * 64 * sizeof(*td->blocks)); init_get_bits(&gb, buf, data_size << 3); - decode_dc_coeffs(&gb, ctx->blocks, blocks_per_slice); + decode_dc_coeffs(&gb, td->blocks, blocks_per_slice); - decode_ac_coeffs(&gb, ctx->blocks, blocks_per_slice, + decode_ac_coeffs(&gb, td->blocks, blocks_per_slice, plane_size_factor, ctx->scantable.permutated); /* inverse quantization, inverse transform and output */ - block_ptr = ctx->blocks; + block_ptr = td->blocks; for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) { ctx->dsp.idct_put(out_ptr, linesize, block_ptr, qmat); @@ -487,10 +496,14 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf, } -static int decode_slice(ProresContext *ctx, int pic_num, int slice_num, - int mb_x_pos, int mb_y_pos, int mbs_per_slice, - AVCodecContext *avctx) +static int decode_slice(AVCodecContext *avctx, ProresThreadData *td) { + ProresContext *ctx = avctx->priv_data; + int mb_x_pos = td->x_pos; + int mb_y_pos = td->y_pos; + int pic_num = ctx->pic_num; + int slice_num = td->slice_num; + int mbs_per_slice = td->slice_width; const uint8_t *buf; uint8_t *y_data, *u_data, *v_data; AVFrame *pic = avctx->coded_frame; @@ -498,8 +511,8 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num, int slice_data_size, hdr_size, y_data_size, u_data_size, v_data_size; int y_linesize, u_linesize, v_linesize; - buf = ctx->slice_data_index[slice_num]; - slice_data_size = ctx->slice_data_index[slice_num + 1] - buf; + buf = ctx->slice_data[slice_num].index; + slice_data_size = ctx->slice_data[slice_num + 1].index - buf; slice_width_factor = av_log2(mbs_per_slice); @@ -551,14 +564,14 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num, } /* decode luma plane */ - decode_slice_plane(ctx, buf + hdr_size, y_data_size, + decode_slice_plane(ctx, td, buf + hdr_size, y_data_size, (uint16_t*) (y_data + (mb_y_pos << 4) * y_linesize + (mb_x_pos << 5)), y_linesize, mbs_per_slice, 4, slice_width_factor + 2, ctx->qmat_luma_scaled); /* decode U chroma plane */ - decode_slice_plane(ctx, buf + hdr_size + y_data_size, u_data_size, + decode_slice_plane(ctx, td, buf + hdr_size + y_data_size, u_data_size, (uint16_t*) (u_data + (mb_y_pos << 4) * u_linesize + (mb_x_pos << ctx->mb_chroma_factor)), u_linesize, mbs_per_slice, ctx->num_chroma_blocks, @@ -566,7 +579,7 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num, ctx->qmat_chroma_scaled); /* decode V chroma plane */ - decode_slice_plane(ctx, buf + hdr_size + y_data_size + u_data_size, + decode_slice_plane(ctx, td, buf + hdr_size + y_data_size + u_data_size, v_data_size, (uint16_t*) (v_data + (mb_y_pos << 4) * v_linesize + (mb_x_pos << ctx->mb_chroma_factor)), @@ -585,6 +598,7 @@ static int decode_picture(ProresContext *ctx, int pic_num, slice_num = 0; + ctx->pic_num = pic_num; for (y_pos = 0; y_pos < ctx->num_y_mbs; y_pos++) { slice_width = 1 << ctx->slice_width_factor; @@ -593,15 +607,18 @@ static int decode_picture(ProresContext *ctx, int pic_num, while (ctx->num_x_mbs - x_pos < slice_width) slice_width >>= 1; - if (decode_slice(ctx, pic_num, slice_num, x_pos, y_pos, - slice_width, avctx) < 0) - return -1; + ctx->slice_data[slice_num].slice_num = slice_num; + ctx->slice_data[slice_num].x_pos = x_pos; + ctx->slice_data[slice_num].y_pos = y_pos; + ctx->slice_data[slice_num].slice_width = slice_width; slice_num++; } } - return 0; + return avctx->execute(avctx, (void *) decode_slice, + ctx->slice_data, NULL, slice_num, + sizeof(ctx->slice_data[0])); } @@ -664,7 +681,7 @@ static av_cold int decode_close(AVCodecContext *avctx) if (ctx->picture.data[0]) avctx->release_buffer(avctx, &ctx->picture); - av_freep(&ctx->slice_data_index); + av_freep(&ctx->slice_data); return 0; } @@ -678,6 +695,6 @@ AVCodec ff_prores_decoder = { .init = decode_init, .close = decode_close, .decode = decode_frame, - .capabilities = CODEC_CAP_DR1, + .capabilities = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS, .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)") }; -- cgit v1.1 From fdba370f8a1bdfc22ecbdf3c7148c2f8680a4ac4 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 11 Oct 2011 16:00:21 +0100 Subject: h264: fix HRD parameters parsing The bit_rate_value_minus1 and cpb_size_value_minus1 elements allow a wider range than get_ue_golomb() supports. This adds a get_ue_golomb_long() function supporting up to 31 leading zeros, which is the maximum for these syntax elements, and uses it in decode_hrd_parameters(). Signed-off-by: Mans Rullgard --- libavcodec/golomb.h | 14 ++++++++++++++ libavcodec/h264_ps.c | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h index 83d277f..503aa14 100644 --- a/libavcodec/golomb.h +++ b/libavcodec/golomb.h @@ -75,6 +75,20 @@ static inline int get_ue_golomb(GetBitContext *gb){ } } +/** + * Read an unsigned Exp-Golomb code in the range 0 to UINT32_MAX-1. + */ +static inline unsigned get_ue_golomb_long(GetBitContext *gb) +{ + unsigned buf, log; + + buf = show_bits_long(gb, 32); + log = 31 - av_log2(buf); + skip_bits_long(gb, log); + + return get_bits_long(gb, log + 1) - 1; +} + /** * read unsigned exp golomb code, constraint to a max of 31. * the return value is undefined if the stored value exceeds 31. diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index bb673e9..677ca80 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -130,8 +130,8 @@ static inline int decode_hrd_parameters(H264Context *h, SPS *sps){ get_bits(&s->gb, 4); /* bit_rate_scale */ get_bits(&s->gb, 4); /* cpb_size_scale */ for(i=0; igb); /* bit_rate_value_minus1 */ - get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */ + get_ue_golomb_long(&s->gb); /* bit_rate_value_minus1 */ + get_ue_golomb_long(&s->gb); /* cpb_size_value_minus1 */ get_bits1(&s->gb); /* cbr_flag */ } sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1; -- cgit v1.1 From 5f3fb599536dd5bceb1d45cb73cd0b0ce3e5560c Mon Sep 17 00:00:00 2001 From: John Stebbins Date: Tue, 11 Oct 2011 14:57:58 -0400 Subject: fix AC3ENC_OPT_MODE_ON/OFF The values were reversed. Signed-off-by: Justin Ruggles --- libavcodec/ac3enc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h index bf5ccea..4a01749 100644 --- a/libavcodec/ac3enc.h +++ b/libavcodec/ac3enc.h @@ -73,8 +73,8 @@ typedef int64_t CoefSumType; #define AC3ENC_OPT_OFF 0 #define AC3ENC_OPT_ON 1 #define AC3ENC_OPT_NOT_INDICATED 0 -#define AC3ENC_OPT_MODE_ON 1 -#define AC3ENC_OPT_MODE_OFF 2 +#define AC3ENC_OPT_MODE_ON 2 +#define AC3ENC_OPT_MODE_OFF 1 /* specific option values */ #define AC3ENC_OPT_LARGE_ROOM 1 -- cgit v1.1