From cb668476ab1343d27e03edc0b32f57ca7a187471 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sun, 9 Oct 2011 20:18:34 +0100
Subject: motion_est: make MotionExtContext.map_generation unsigned

The way this value is used, it should be an unsigned type.
While the numerical value has no meaning, unsigned wraparound
is relied upon.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/motion_est.c          |  2 +-
 libavcodec/motion_est_template.c | 36 ++++++++++++++++++------------------
 libavcodec/mpegvideo.h           |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index d0f9367..c4512a9 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -52,7 +52,7 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
                                   int src_index, int ref_index,
                                   int size, int h);
 
-static inline int update_map_generation(MotionEstContext *c)
+static inline unsigned update_map_generation(MotionEstContext *c)
 {
     c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
     if(c->map_generation==0){
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 88e23ae..d0d4b41 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -89,8 +89,8 @@ static int hpel_motion_search(MpegEncContext * s,
         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
 
-        int key;
-        int map_generation= c->map_generation;
+        unsigned key;
+        unsigned map_generation= c->map_generation;
 #ifndef NDEBUG
         uint32_t *map= c->map;
 #endif
@@ -208,7 +208,7 @@ static int qpel_motion_search(MpegEncContext * s,
     const int mx = *mx_ptr;
     const int my = *my_ptr;
     const int penalty_factor= c->sub_penalty_factor;
-    const int map_generation= c->map_generation;
+    const unsigned map_generation = c->map_generation;
     const int subpel_quality= c->avctx->me_subpel_quality;
     uint32_t *map= c->map;
     me_cmp_func cmpf, chroma_cmpf;
@@ -354,7 +354,7 @@ static int qpel_motion_search(MpegEncContext * s,
 
 #define CHECK_MV(x,y)\
 {\
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     assert((x) >= xmin);\
     assert((x) <= xmax);\
@@ -382,7 +382,7 @@ static int qpel_motion_search(MpegEncContext * s,
 
 #define CHECK_MV_DIR(x,y,new_dir)\
 {\
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
     if(map[index]!=key){\
@@ -420,13 +420,13 @@ static av_always_inline int small_diamond_search(MpegEncContext * s, int *best,
     int next_dir=-1;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
     { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
-        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
+        const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
         const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
         if(map[index]!=key){ //this will be executed only very rarey
             score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
@@ -462,7 +462,7 @@ static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
     int dia_size;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
@@ -503,7 +503,7 @@ static int hex_search(MpegEncContext * s, int *best, int dmin,
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
     int x,y,d;
     const int dec= dia_size & (dia_size-1);
 
@@ -537,7 +537,7 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
     int x,y,i,d;
     int dia_size= c->dia_size&0xFF;
     const int dec= dia_size & (dia_size-1);
@@ -575,7 +575,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin,
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
     int x,y,x2,y2, i, j, d;
     const int dia_size= c->dia_size&0xFE;
     static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
@@ -622,7 +622,7 @@ static int full_search(MpegEncContext * s, int *best, int dmin,
     me_cmp_func cmpf, chroma_cmpf;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
     int x,y, d;
     const int dia_size= c->dia_size&0xFF;
 
@@ -651,7 +651,7 @@ static int full_search(MpegEncContext * s, int *best, int dmin,
 
 #define SAB_CHECK_MV(ax,ay)\
 {\
-    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
+    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
     const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
 /*printf("sab check %d %d\n", ax, ay);*/\
     if(map[index]!=key){\
@@ -690,7 +690,7 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
     int i, j;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
@@ -775,7 +775,7 @@ static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
     int dia_size;
     LOAD_COMMON
     LOAD_COMMON2
-    int map_generation= c->map_generation;
+    unsigned map_generation = c->map_generation;
 
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
@@ -867,7 +867,7 @@ static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int
     int d;                   ///< the score (cmp + penalty) of any given mv
     int dmin;                /**< the best value of d, i.e. the score
                                corresponding to the mv stored in best[]. */
-    int map_generation;
+    unsigned map_generation;
     int penalty_factor;
     const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
     const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
@@ -995,7 +995,7 @@ static int epzs_motion_search4(MpegEncContext * s,
     MotionEstContext * const c= &s->me;
     int best[2]={0, 0};
     int d, dmin;
-    int map_generation;
+    unsigned map_generation;
     const int penalty_factor= c->penalty_factor;
     const int size=1;
     const int h=8;
@@ -1055,7 +1055,7 @@ static int epzs_motion_search2(MpegEncContext * s,
     MotionEstContext * const c= &s->me;
     int best[2]={0, 0};
     int d, dmin;
-    int map_generation;
+    unsigned map_generation;
     const int penalty_factor= c->penalty_factor;
     const int size=0; //FIXME pass as arg
     const int h=8;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index d0f4bfd..7de7217 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -154,7 +154,7 @@ typedef struct MotionEstContext{
     int best_bits;
     uint32_t *map;                     ///< map to avoid duplicate evaluations
     uint32_t *score_map;               ///< map to store the scores
-    int map_generation;
+    unsigned map_generation;
     int pre_penalty_factor;
     int penalty_factor;                /**< an estimate of the bits required to
                                         code a given mv value, e.g. (1,0) takes
-- 
cgit v1.1


From c79d2a20bad59298188171f1316a830d563a41ee Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 10 Oct 2011 20:41:31 +0100
Subject: sipr: fix get_bits(0) calls

Zero-length get_bits() is undefined, must check before calling.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/sipr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c
index f57ec4f..08dd63a 100644
--- a/libavcodec/sipr.c
+++ b/libavcodec/sipr.c
@@ -194,14 +194,16 @@ static void decode_parameters(SiprParameters* parms, GetBitContext *pgb,
 {
     int i, j;
 
-    parms->ma_pred_switch           = get_bits(pgb, p->ma_predictor_bits);
+    if (p->ma_predictor_bits)
+        parms->ma_pred_switch       = get_bits(pgb, p->ma_predictor_bits);
 
     for (i = 0; i < 5; i++)
         parms->vq_indexes[i]        = get_bits(pgb, p->vq_indexes_bits[i]);
 
     for (i = 0; i < p->subframe_count; i++) {
         parms->pitch_delay[i]       = get_bits(pgb, p->pitch_delay_bits[i]);
-        parms->gp_index[i]          = get_bits(pgb, p->gp_index_bits);
+        if (p->gp_index_bits)
+            parms->gp_index[i]      = get_bits(pgb, p->gp_index_bits);
 
         for (j = 0; j < p->number_of_fc_indexes; j++)
             parms->fc_indexes[i][j] = get_bits(pgb, p->fc_index_bits[j]);
-- 
cgit v1.1


From be1242a3f2b28e9cb08515bdc1db6c14403c279a Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 11 Oct 2011 00:58:03 +0100
Subject: h264: fix detection of optional trailing PPS elements

The PPS may contain a few trailing elements whose presence is
only signalled by data remaining after the the mandatory part
has been parsed.  The current code fails to take into account
the rbsp_trailing_bits() when deciding whether to parse these
optional elements.  Assuming no unnecessary padding bytes are
passed to this function, the optional elements are present if
either more than 8 extra bits remain or the remaining bits do
not form a valid rbsp_trailing_bits() after the mandatory PPS
elements have been parsed.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/h264_ps.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 7491807..bb673e9 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -462,6 +462,7 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
     unsigned int pps_id= get_ue_golomb(&s->gb);
     PPS *pps;
     const int qp_bd_offset = 6*(h->sps.bit_depth_luma-8);
+    int bits_left;
 
     if(pps_id >= MAX_PPS_COUNT) {
         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
@@ -538,7 +539,9 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
     memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
     memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
 
-    if(get_bits_count(&s->gb) < bit_length){
+    bits_left = bit_length - get_bits_count(&s->gb);
+    if (bits_left && (bits_left > 8 ||
+                      show_bits(&s->gb, bits_left) != 1 << (bits_left - 1))) {
         pps->transform_8x8_mode= get_bits1(&s->gb);
         decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
         pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
-- 
cgit v1.1


From a7984a6a6dbdfbd95df4a669a452ddf34c485cab Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 21 Sep 2011 11:34:26 -0400
Subject: smacker: Separate audio flags from sample rates in smacker demuxer.

Makes the code easier to understand.
---
 libavformat/smacker.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index 995ef41..447e6e6 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -31,11 +31,11 @@
 #define SMACKER_FLAG_RING_FRAME 0x01
 
 enum SAudFlags {
-    SMK_AUD_PACKED  = 0x80000000,
-    SMK_AUD_16BITS  = 0x20000000,
-    SMK_AUD_STEREO  = 0x10000000,
-    SMK_AUD_BINKAUD = 0x08000000,
-    SMK_AUD_USEDCT  = 0x04000000
+    SMK_AUD_PACKED  = 0x80,
+    SMK_AUD_16BITS  = 0x20,
+    SMK_AUD_STEREO  = 0x10,
+    SMK_AUD_BINKAUD = 0x08,
+    SMK_AUD_USEDCT  = 0x04
 };
 
 typedef struct SmackerContext {
@@ -48,6 +48,7 @@ typedef struct SmackerContext {
     uint32_t audio[7];
     uint32_t treesize;
     uint32_t mmap_size, mclr_size, full_size, type_size;
+    uint8_t  aflags[7];
     uint32_t rates[7];
     uint32_t pad;
     /* frame info */
@@ -129,8 +130,10 @@ static int smacker_read_header(AVFormatContext *s, AVFormatParameters *ap)
     smk->mclr_size = avio_rl32(pb);
     smk->full_size = avio_rl32(pb);
     smk->type_size = avio_rl32(pb);
-    for(i = 0; i < 7; i++)
-        smk->rates[i] = avio_rl32(pb);
+    for(i = 0; i < 7; i++) {
+        smk->rates[i]  = avio_rl24(pb);
+        smk->aflags[i] = avio_r8(pb);
+    }
     smk->pad = avio_rl32(pb);
     /* setup data */
     if(smk->frames > 0xFFFFFF) {
@@ -173,23 +176,23 @@ static int smacker_read_header(AVFormatContext *s, AVFormatParameters *ap)
     /* handle possible audio streams */
     for(i = 0; i < 7; i++) {
         smk->indexes[i] = -1;
-        if(smk->rates[i] & 0xFFFFFF){
+        if (smk->rates[i]) {
             ast[i] = av_new_stream(s, 0);
             smk->indexes[i] = ast[i]->index;
             ast[i]->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-            if (smk->rates[i] & SMK_AUD_BINKAUD) {
+            if (smk->aflags[i] & SMK_AUD_BINKAUD) {
                 ast[i]->codec->codec_id = CODEC_ID_BINKAUDIO_RDFT;
-            } else if (smk->rates[i] & SMK_AUD_USEDCT) {
+            } else if (smk->aflags[i] & SMK_AUD_USEDCT) {
                 ast[i]->codec->codec_id = CODEC_ID_BINKAUDIO_DCT;
-            } else if (smk->rates[i] & SMK_AUD_PACKED){
+            } else if (smk->aflags[i] & SMK_AUD_PACKED){
                 ast[i]->codec->codec_id = CODEC_ID_SMACKAUDIO;
                 ast[i]->codec->codec_tag = MKTAG('S', 'M', 'K', 'A');
             } else {
                 ast[i]->codec->codec_id = CODEC_ID_PCM_U8;
             }
-            ast[i]->codec->channels = (smk->rates[i] & SMK_AUD_STEREO) ? 2 : 1;
-            ast[i]->codec->sample_rate = smk->rates[i] & 0xFFFFFF;
-            ast[i]->codec->bits_per_coded_sample = (smk->rates[i] & SMK_AUD_16BITS) ? 16 : 8;
+            ast[i]->codec->channels = (smk->aflags[i] & SMK_AUD_STEREO) ? 2 : 1;
+            ast[i]->codec->sample_rate = smk->rates[i];
+            ast[i]->codec->bits_per_coded_sample = (smk->aflags[i] & SMK_AUD_16BITS) ? 16 : 8;
             if(ast[i]->codec->bits_per_coded_sample == 16 && ast[i]->codec->codec_id == CODEC_ID_PCM_U8)
                 ast[i]->codec->codec_id = CODEC_ID_PCM_S16LE;
             av_set_pts_info(ast[i], 64, 1, ast[i]->codec->sample_rate
-- 
cgit v1.1


From e190e453bd1e4d4b409ed3556b3a50d1087c15d7 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 21 Sep 2011 11:37:51 -0400
Subject: smacker: validate number of channels

---
 libavcodec/smacker.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 496bbb5..f3dec7f 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -559,6 +559,10 @@ static av_cold int decode_end(AVCodecContext *avctx)
 
 static av_cold int smka_decode_init(AVCodecContext *avctx)
 {
+    if (avctx->channels < 1 || avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
+        return AVERROR(EINVAL);
+    }
     avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
     avctx->sample_fmt = avctx->bits_per_coded_sample == 8 ? AV_SAMPLE_FMT_U8 : AV_SAMPLE_FMT_S16;
     return 0;
-- 
cgit v1.1


From cf044f8bff0d28dbc34492f18b0d18b3ba8bad9d Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 21 Sep 2011 11:42:55 -0400
Subject: smacker: check buffer size before reading output size

---
 libavcodec/smacker.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index f3dec7f..20ddc15 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -586,6 +586,11 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     int bits, stereo;
     int pred[2] = {0, 0};
 
+    if (buf_size <= 4) {
+        av_log(avctx, AV_LOG_ERROR, "packet is too small\n");
+        return AVERROR(EINVAL);
+    }
+
     unp_size = AV_RL32(buf);
 
     init_get_bits(&gb, buf + 4, (buf_size - 4) * 8);
-- 
cgit v1.1


From ff1f89de2da3472d133e2c95bf7c9ad2d88df33d Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 21 Sep 2011 11:49:33 -0400
Subject: smacker: validate channels and sample format.

---
 libavcodec/smacker.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 20ddc15..d8a3d77 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -606,6 +606,14 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
         av_log(avctx, AV_LOG_ERROR, "Frame is too large to fit in buffer\n");
         return -1;
     }
+    if (stereo ^ (avctx->channels != 1)) {
+        av_log(avctx, AV_LOG_ERROR, "channels mismatch\n");
+        return AVERROR(EINVAL);
+    }
+    if (bits && avctx->sample_fmt == AV_SAMPLE_FMT_U8) {
+        av_log(avctx, AV_LOG_ERROR, "sample format mismatch\n");
+        return AVERROR(EINVAL);
+    }
 
     memset(vlc, 0, sizeof(VLC) * 4);
     memset(h, 0, sizeof(HuffContext) * 4);
-- 
cgit v1.1


From f9a9c8f9bc88cd34d2393311055596e649e868fb Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 28 Sep 2011 14:47:31 +0200
Subject: flashsv: fix typo in av_log() message

---
 libavcodec/flashsv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index 57d33c0..b1424a5 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -300,7 +300,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
     /* check for changes of image width and image height */
     if (avctx->width != s->image_width || avctx->height != s->image_height) {
         av_log(avctx, AV_LOG_ERROR,
-               "Frame width or height differs from first frames!\n");
+               "Frame width or height differs from first frame!\n");
         av_log(avctx, AV_LOG_ERROR, "fh = %d, fv %d  vs  ch = %d, cv = %d\n",
                avctx->height, avctx->width, s->image_height, s->image_width);
         return AVERROR_INVALIDDATA;
-- 
cgit v1.1


From 4b7f49082f80cf92475c999f8c42e112a13aed6a Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Wed, 28 Sep 2011 14:47:58 +0200
Subject: flashsv: return more meaningful error value

---
 libavcodec/flashsv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index b1424a5..c99c21c 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -366,7 +366,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
                 if (s->color_depth != 0 && s->color_depth != 2) {
                     av_log(avctx, AV_LOG_ERROR,
                            "%dx%d invalid color depth %d\n", i, j, s->color_depth);
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
 
                 if (has_diff) {
-- 
cgit v1.1


From 25c27f379faaf75479111f451a78ac6da71a6e0c Mon Sep 17 00:00:00 2001
From: Diego Biurrun <diego@biurrun.de>
Date: Thu, 29 Sep 2011 21:57:29 +0200
Subject: doc: fix references to obsolete presets directories for avconv/ffmpeg

---
 cmdutils.h      | 2 +-
 doc/ffmpeg.texi | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmdutils.h b/cmdutils.h
index a20b779..1c17433 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -334,7 +334,7 @@ int64_t guess_correct_pts(PtsCorrectionContext *ctx, int64_t pts, int64_t dts);
  *
  * If is_path is non-zero, look for the file in the path preset_name.
  * Otherwise search for a file named arg.ffpreset in the directories
- * $FFMPEG_DATADIR (if set), $HOME/.ffmpeg, and in the datadir defined
+ * $AVCONV_DATADIR (if set), $HOME/.avconv, and in the datadir defined
  * at configuration time, in that order. If no such file is found and
  * codec_name is defined, then search for a file named
  * codec_name-preset_name.ffpreset in the above-mentioned directories.
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 2457d65..22989c0 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -806,8 +806,8 @@ preset options identifies the preset file to use according to the
 following rules:
 
 First ffmpeg searches for a file named @var{arg}.ffpreset in the
-directories @file{$FFMPEG_DATADIR} (if set), and @file{$HOME/.ffmpeg}, and in
-the datadir defined at configuration time (usually @file{PREFIX/share/ffmpeg})
+directories @file{$AVCONV_DATADIR} (if set), and @file{$HOME/.avconv}, and in
+the datadir defined at configuration time (usually @file{PREFIX/share/avconv})
 in that order. For example, if the argument is @code{libx264-max}, it will
 search for the file @file{libx264-max.ffpreset}.
 
-- 
cgit v1.1


From dff4177546ef78ef7f3a133cf27ecd72b693181d Mon Sep 17 00:00:00 2001
From: Raivo Hool <raivo.hool@gmail.com>
Date: Tue, 11 Oct 2011 10:53:39 +0300
Subject: mov: fix disc/track numbers and totals

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/mov.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 3190afd..f6f95c2 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -81,15 +81,20 @@ typedef struct MOVParseTableEntry {
 
 static const MOVParseTableEntry mov_default_parse_table[];
 
-static int mov_metadata_trkn(MOVContext *c, AVIOContext *pb, unsigned len)
+static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
+                                             unsigned len, const char *key)
 {
     char buf[16];
 
+    short current, total;
     avio_rb16(pb); // unknown
-    snprintf(buf, sizeof(buf), "%d", avio_rb16(pb));
-    av_dict_set(&c->fc->metadata, "track", buf, 0);
-
-    avio_rb16(pb); // total tracks
+    current = avio_rb16(pb);
+    total = avio_rb16(pb);
+    if (!total)
+        snprintf(buf, sizeof(buf), "%d", current);
+    else
+        snprintf(buf, sizeof(buf), "%d/%d", current, total);
+    av_dict_set(&c->fc->metadata, key, buf, 0);
 
     return 0;
 }
@@ -140,7 +145,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     const char *key = NULL;
     uint16_t str_size, langcode = 0;
     uint32_t data_type = 0;
-    int (*parse)(MOVContext*, AVIOContext*, unsigned) = NULL;
+    int (*parse)(MOVContext*, AVIOContext*, unsigned, const char*) = NULL;
 
     switch (atom.type) {
     case MKTAG(0xa9,'n','a','m'): key = "title";     break;
@@ -163,7 +168,9 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     case MKTAG( 't','v','e','n'): key = "episode_id";break;
     case MKTAG( 't','v','n','n'): key = "network";   break;
     case MKTAG( 't','r','k','n'): key = "track";
-        parse = mov_metadata_trkn; break;
+        parse = mov_metadata_track_or_disc_number; break;
+    case MKTAG( 'd','i','s','k'): key = "disc";
+        parse = mov_metadata_track_or_disc_number; break;
     }
 
     if (c->itunes_metadata && atom.size > 8) {
@@ -198,7 +205,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     str_size = FFMIN3(sizeof(str)-1, str_size, atom.size);
 
     if (parse)
-        parse(c, pb, str_size);
+        parse(c, pb, str_size, key);
     else {
         if (data_type == 3 || (data_type == 0 && langcode < 0x800)) { // MAC Encoded
             mov_read_mac_string(c, pb, str_size, str, sizeof(str));
-- 
cgit v1.1


From 2804d320756dfd1d1927299f1c962699f4a39293 Mon Sep 17 00:00:00 2001
From: Raivo Hool <raivo.hool@gmail.com>
Date: Tue, 11 Oct 2011 10:53:40 +0300
Subject: mov: read album_artist atom

Signed-off-by: Anton Khirnov <anton@khirnov.net>
---
 libavformat/mov.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index f6f95c2..9661149 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -151,6 +151,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     case MKTAG(0xa9,'n','a','m'): key = "title";     break;
     case MKTAG(0xa9,'a','u','t'):
     case MKTAG(0xa9,'A','R','T'): key = "artist";    break;
+    case MKTAG( 'a','A','R','T'): key = "album_artist";    break;
     case MKTAG(0xa9,'w','r','t'): key = "composer";  break;
     case MKTAG( 'c','p','r','t'):
     case MKTAG(0xa9,'c','p','y'): key = "copyright"; break;
-- 
cgit v1.1


From c780b543e72141393ae3c0b0cb2654f9a5e35f73 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Fri, 7 Oct 2011 20:09:56 +0200
Subject: id3v2: fix NULL pointer dereference

Bug found by Laurent Aimar fenrir at videolan org
---
 libavformat/id3v2.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index c30ab4c..cea0ee0 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -351,7 +351,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
     AVIOContext *pbx;
     unsigned char *buffer = NULL;
     int buffer_size = 0;
-    void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL;
+    const ID3v2EMFunc *extra_func;
 
     switch (version) {
     case 2:
@@ -419,7 +419,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
             av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
             avio_skip(s->pb, tlen);
         /* check for text tag or supported special meta tag */
-        } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)->read))) {
+        } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) {
             if (unsync || tunsync) {
                 int i, j;
                 av_fast_malloc(&buffer, &buffer_size, tlen);
@@ -445,7 +445,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
                 read_ttag(s, pbx, tlen, tag);
             else
                 /* parse special meta tag */
-                extra_func(s, pbx, tlen, tag, extra_meta);
+                extra_func->read(s, pbx, tlen, tag, extra_meta);
         }
         else if (!tag[0]) {
             if (tag[1])
@@ -508,11 +508,11 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic)
 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta)
 {
     ID3v2ExtraMeta *current = *extra_meta, *next;
-    void (*free_func)(ID3v2ExtraMeta*);
+    const ID3v2EMFunc *extra_func;
 
     while (current) {
-        if ((free_func = get_extra_meta_func(current->tag, 1)->free))
-            free_func(current->data);
+        if ((extra_func = get_extra_meta_func(current->tag, 1)))
+            extra_func->free(current->data);
         next = current->next;
         av_freep(&current);
         current = next;
-- 
cgit v1.1


From 5a7ba58657bda249ac625456577651ab98a9d231 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 11 Oct 2011 10:12:38 +0200
Subject: mov: cosmetics, fix for and if spacing

---
 libavformat/mov.c | 96 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 50 insertions(+), 46 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 9661149..4f5bb0c 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -271,7 +271,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         int (*parse)(MOVContext*, AVIOContext*, MOVAtom) = NULL;
         a.size = atom.size;
         a.type=0;
-        if(atom.size >= 8) {
+        if (atom.size >= 8) {
             a.size = avio_rb32(pb);
             a.type = avio_rl32(pb);
         }
@@ -288,7 +288,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 break;
         }
         a.size -= 8;
-        if(a.size < 0)
+        if (a.size < 0)
             break;
         a.size = FFMIN(a.size, atom.size - total_size);
 
@@ -455,11 +455,11 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     if     (type == MKTAG('v','i','d','e'))
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-    else if(type == MKTAG('s','o','u','n'))
+    else if (type == MKTAG('s','o','u','n'))
         st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-    else if(type == MKTAG('m','1','a',' '))
+    else if (type == MKTAG('m','1','a',' '))
         st->codec->codec_id = CODEC_ID_MP2;
-    else if((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p')))
+    else if ((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p')))
         st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
 
     avio_rb32(pb); /* component  manufacture */
@@ -556,7 +556,7 @@ static int mov_read_pasp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 /* this atom contains actual media data */
 static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
-    if(atom.size == 0) /* wrong one (MP4) */
+    if (atom.size == 0) /* wrong one (MP4) */
         return 0;
     c->found_mdat=1;
     return 0; /* now go for moov */
@@ -710,7 +710,7 @@ static int mov_read_smi(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    if((uint64_t)atom.size > (1<<30))
+    if ((uint64_t)atom.size > (1<<30))
         return -1;
 
     // currently SVQ3 decoder expect full STSD header - so let's fake it
@@ -769,10 +769,10 @@ static int mov_read_extradata(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st= c->fc->streams[c->fc->nb_streams-1];
     size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE;
-    if(size > INT_MAX || (uint64_t)atom.size > INT_MAX)
+    if (size > INT_MAX || (uint64_t)atom.size > INT_MAX)
         return -1;
     buf= av_realloc(st->codec->extradata, size);
-    if(!buf)
+    if (!buf)
         return -1;
     st->codec->extradata= buf;
     buf+= st->codec->extradata_size;
@@ -791,7 +791,7 @@ static int mov_read_wave(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    if((uint64_t)atom.size > (1<<30))
+    if ((uint64_t)atom.size > (1<<30))
         return -1;
 
     if (st->codec->codec_id == CODEC_ID_QDM2 || st->codec->codec_id == CODEC_ID_QDMC) {
@@ -822,7 +822,7 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    if((uint64_t)atom.size > (1<<30))
+    if ((uint64_t)atom.size > (1<<30))
         return -1;
 
     av_free(st->codec->extradata);
@@ -849,7 +849,7 @@ static int mov_read_strf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    if((uint64_t)atom.size > (1<<30))
+    if ((uint64_t)atom.size > (1<<30))
         return -1;
 
     av_free(st->codec->extradata);
@@ -878,7 +878,7 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     entries = avio_rb32(pb);
 
-    if(entries >= UINT_MAX/sizeof(int64_t))
+    if (entries >= UINT_MAX/sizeof(int64_t))
         return -1;
 
     sc->chunk_offsets = av_malloc(entries * sizeof(int64_t));
@@ -887,10 +887,10 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     sc->chunk_count = entries;
 
     if      (atom.type == MKTAG('s','t','c','o'))
-        for(i=0; i<entries; i++)
+        for (i=0; i<entries; i++)
             sc->chunk_offsets[i] = avio_rb32(pb);
     else if (atom.type == MKTAG('c','o','6','4'))
-        for(i=0; i<entries; i++)
+        for (i=0; i<entries; i++)
             sc->chunk_offsets[i] = avio_rb64(pb);
     else
         return -1;
@@ -944,7 +944,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
     st = c->fc->streams[c->fc->nb_streams-1];
     sc = st->priv_data;
 
-    for(pseudo_stream_id=0; pseudo_stream_id<entries; pseudo_stream_id++) {
+    for (pseudo_stream_id=0; pseudo_stream_id<entries; pseudo_stream_id++) {
         //Parsing Sample description table
         enum CodecID id;
         int dref_id = 1;
@@ -992,9 +992,9 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
                 id = ff_codec_get_id(ff_codec_bmp_tags, format);
             if (id > 0)
                 st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-            else if(st->codec->codec_type == AVMEDIA_TYPE_DATA){
+            else if (st->codec->codec_type == AVMEDIA_TYPE_DATA){
                 id = ff_codec_get_id(ff_codec_movsubtitle_tags, format);
-                if(id > 0)
+                if (id > 0)
                     st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
             }
         }
@@ -1003,7 +1003,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
                 (format >> 0) & 0xff, (format >> 8) & 0xff, (format >> 16) & 0xff,
                 (format >> 24) & 0xff, st->codec->codec_type);
 
-        if(st->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
+        if (st->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
             unsigned int color_depth, len;
             int color_greyscale;
 
@@ -1106,7 +1106,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
                 }
                 sc->has_palette = 1;
             }
-        } else if(st->codec->codec_type==AVMEDIA_TYPE_AUDIO) {
+        } else if (st->codec->codec_type==AVMEDIA_TYPE_AUDIO) {
             int bits_per_sample, flags;
             uint16_t version = avio_rb16(pb);
 
@@ -1125,13 +1125,13 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
 
             //Read QT version 1 fields. In version 0 these do not exist.
             av_dlog(c->fc, "version =%d, isom =%d\n",version,c->isom);
-            if(!c->isom) {
-                if(version==1) {
+            if (!c->isom) {
+                if (version==1) {
                     sc->samples_per_frame = avio_rb32(pb);
                     avio_rb32(pb); /* bytes per packet */
                     sc->bytes_per_frame = avio_rb32(pb);
                     avio_rb32(pb); /* bytes per sample */
-                } else if(version==2) {
+                } else if (version==2) {
                     avio_rb32(pb); /* sizeof struct only */
                     st->codec->sample_rate = av_int2dbl(avio_rb64(pb)); /* float 64 */
                     st->codec->channels = avio_rb32(pb);
@@ -1186,7 +1186,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
                 st->codec->bits_per_coded_sample = bits_per_sample;
                 sc->sample_size = (bits_per_sample >> 3) * st->codec->channels;
             }
-        } else if(st->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
+        } else if (st->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
             // ttxt stsd contains display flags, justification, background
             // color, fonts, and default styles, so fake an atom to read it
             MOVAtom fake_atom = { .size = size - (avio_tell(pb) - start_pos) };
@@ -1208,7 +1208,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
             avio_skip(pb, a.size);
     }
 
-    if(st->codec->codec_type==AVMEDIA_TYPE_AUDIO && st->codec->sample_rate==0 && sc->time_scale>1)
+    if (st->codec->codec_type==AVMEDIA_TYPE_AUDIO && st->codec->sample_rate==0 && sc->time_scale>1)
         st->codec->sample_rate= sc->time_scale;
 
     /* special codec parameters handling */
@@ -1297,14 +1297,14 @@ static int mov_read_stsc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     av_dlog(c->fc, "track[%i].stsc.entries = %i\n", c->fc->nb_streams-1, entries);
 
-    if(entries >= UINT_MAX / sizeof(*sc->stsc_data))
+    if (entries >= UINT_MAX / sizeof(*sc->stsc_data))
         return -1;
     sc->stsc_data = av_malloc(entries * sizeof(*sc->stsc_data));
     if (!sc->stsc_data)
         return AVERROR(ENOMEM);
     sc->stsc_count = entries;
 
-    for(i=0; i<entries; i++) {
+    for (i=0; i<entries; i++) {
         sc->stsc_data[i].first = avio_rb32(pb);
         sc->stsc_data[i].count = avio_rb32(pb);
         sc->stsc_data[i].id = avio_rb32(pb);
@@ -1359,14 +1359,14 @@ static int mov_read_stss(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     av_dlog(c->fc, "keyframe_count = %d\n", entries);
 
-    if(entries >= UINT_MAX / sizeof(int))
+    if (entries >= UINT_MAX / sizeof(int))
         return -1;
     sc->keyframes = av_malloc(entries * sizeof(int));
     if (!sc->keyframes)
         return AVERROR(ENOMEM);
     sc->keyframe_count = entries;
 
-    for(i=0; i<entries; i++) {
+    for (i=0; i<entries; i++) {
         sc->keyframes[i] = avio_rb32(pb);
         //av_dlog(c->fc, "keyframes[]=%d\n", sc->keyframes[i]);
     }
@@ -1434,7 +1434,7 @@ static int mov_read_stsz(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     init_get_bits(&gb, buf, 8*num_bytes);
 
-    for(i=0; i<entries; i++)
+    for (i=0; i<entries; i++)
         sc->sample_sizes[i] = get_bits_long(&gb, field_size);
 
     av_free(buf);
@@ -1458,16 +1458,19 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     avio_rb24(pb); /* flags */
     entries = avio_rb32(pb);
 
-    av_dlog(c->fc, "track[%i].stts.entries = %i\n", c->fc->nb_streams-1, entries);
+    av_dlog(c->fc, "track[%i].stts.entries = %i\n",
+            c->fc->nb_streams-1, entries);
 
-    if(entries >= UINT_MAX / sizeof(*sc->stts_data))
+    if (entries >= UINT_MAX / sizeof(*sc->stts_data))
         return -1;
+
     sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data));
     if (!sc->stts_data)
         return AVERROR(ENOMEM);
+
     sc->stts_count = entries;
 
-    for(i=0; i<entries; i++) {
+    for (i=0; i<entries; i++) {
         int sample_duration;
         int sample_count;
 
@@ -1476,14 +1479,15 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         sc->stts_data[i].count= sample_count;
         sc->stts_data[i].duration= sample_duration;
 
-        av_dlog(c->fc, "sample_count=%d, sample_duration=%d\n",sample_count,sample_duration);
+        av_dlog(c->fc, "sample_count=%d, sample_duration=%d\n",
+                sample_count, sample_duration);
 
         duration+=(int64_t)sample_duration*sample_count;
         total_sample_count+=sample_count;
     }
 
     st->nb_frames= total_sample_count;
-    if(duration)
+    if (duration)
         st->duration= duration;
     return 0;
 }
@@ -1505,14 +1509,14 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     av_dlog(c->fc, "track[%i].ctts.entries = %i\n", c->fc->nb_streams-1, entries);
 
-    if(entries >= UINT_MAX / sizeof(*sc->ctts_data))
+    if (entries >= UINT_MAX / sizeof(*sc->ctts_data))
         return -1;
     sc->ctts_data = av_malloc(entries * sizeof(*sc->ctts_data));
     if (!sc->ctts_data)
         return AVERROR(ENOMEM);
     sc->ctts_count = entries;
 
-    for(i=0; i<entries; i++) {
+    for (i=0; i<entries; i++) {
         int count    =avio_rb32(pb);
         int duration =avio_rb32(pb);
 
@@ -1595,7 +1599,7 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
                 if (keyframe)
                     distance = 0;
                 sample_size = sc->sample_size > 0 ? sc->sample_size : sc->sample_sizes[current_sample];
-                if(sc->pseudo_stream_id == -1 ||
+                if (sc->pseudo_stream_id == -1 ||
                    sc->stsc_data[stsc_index].id - 1 == sc->pseudo_stream_id) {
                     AVIndexEntry *e = &st->index_entries[st->nb_index_entries++];
                     e->pos = current_offset;
@@ -2160,9 +2164,9 @@ static int mov_read_cmov(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR(ENOMEM);
     }
     avio_read(pb, cmov_data, cmov_len);
-    if(uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK)
+    if (uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK)
         goto free_and_return;
-    if(ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0)
+    if (ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0)
         goto free_and_return;
     atom.type = MKTAG('m','o','o','v');
     atom.size = moov_len;
@@ -2191,10 +2195,10 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     avio_rb24(pb); /* flags */
     edit_count = avio_rb32(pb); /* entries */
 
-    if((uint64_t)edit_count*12+8 > atom.size)
+    if ((uint64_t)edit_count*12+8 > atom.size)
         return -1;
 
-    for(i=0; i<edit_count; i++){
+    for (i=0; i<edit_count; i++){
         int64_t time;
         int64_t duration;
         if (version == 1) {
@@ -2210,7 +2214,7 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         }
     }
 
-    if(edit_count > 1)
+    if (edit_count > 1)
         av_log(c->fc, AV_LOG_WARNING, "multiple edit list entries, "
                "a/v desync might occur, patch welcome\n");
 
@@ -2283,7 +2287,7 @@ static int mov_probe(AVProbeData *p)
 
     /* check file header */
     offset = 0;
-    for(;;) {
+    for (;;) {
         /* ignore invalid offset */
         if ((offset + 8) > (unsigned int)p->buf_size)
             return score;
@@ -2391,7 +2395,7 @@ static int mov_read_header(AVFormatContext *s, AVFormatParameters *ap)
 
     mov->fc = s;
     /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
-    if(pb->seekable)
+    if (pb->seekable)
         atom.size = avio_size(pb);
     else
         atom.size = INT64_MAX;
@@ -2602,7 +2606,7 @@ static int mov_read_close(AVFormatContext *s)
     }
 
     if (mov->dv_demux) {
-        for(i = 0; i < mov->dv_fctx->nb_streams; i++) {
+        for (i = 0; i < mov->dv_fctx->nb_streams; i++) {
             av_freep(&mov->dv_fctx->streams[i]->codec);
             av_freep(&mov->dv_fctx->streams[i]);
         }
-- 
cgit v1.1


From 30c3d976f12665d5d13971172aab062a97cb1bce Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Tue, 11 Oct 2011 10:14:06 +0200
Subject: mov: do not misreport empty stts

Return -1 instead of ENOMEM if entries is 0.
Fixes a av_malloc(0) crash in macosx.
---
 libavformat/mov.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index 4f5bb0c..0e2ad1f 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -1461,8 +1461,8 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     av_dlog(c->fc, "track[%i].stts.entries = %i\n",
             c->fc->nb_streams-1, entries);
 
-    if (entries >= UINT_MAX / sizeof(*sc->stts_data))
-        return -1;
+    if (!entries || entries >= UINT_MAX / sizeof(*sc->stts_data))
+        return AVERROR(EINVAL);
 
     sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data));
     if (!sc->stts_data)
-- 
cgit v1.1


From 88d1e2b2b0a129365a62efd666db0394e8ffbe08 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 8 Oct 2011 02:16:29 +0100
Subject: intfloat_readwrite: fix signed addition overflows

These additions might overflow the signed range for large
input values.  Converting to unsigned before the addition
rather than after avoids such undefined behaviour.  The
result under normal two's complement wraparound remains
unchanged.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavutil/intfloat_readwrite.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavutil/intfloat_readwrite.c b/libavutil/intfloat_readwrite.c
index 21a1c31..4c8de7b 100644
--- a/libavutil/intfloat_readwrite.c
+++ b/libavutil/intfloat_readwrite.c
@@ -30,13 +30,13 @@
 #include "intfloat_readwrite.h"
 
 double av_int2dbl(int64_t v){
-    if(v+v > 0xFFEULL<<52)
+    if((uint64_t)v+v > 0xFFEULL<<52)
         return NAN;
     return ldexp(((v&((1LL<<52)-1)) + (1LL<<52)) * (v>>63|1), (v>>52&0x7FF)-1075);
 }
 
 float av_int2flt(int32_t v){
-    if(v+v > 0xFF000000U)
+    if((uint32_t)v+v > 0xFF000000U)
         return NAN;
     return ldexp(((v&0x7FFFFF) + (1<<23)) * (v>>31|1), (v>>23&0xFF)-150);
 }
-- 
cgit v1.1


From 8babfc033ecb6332155c1f8879e54dee41d16952 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 11 Oct 2011 12:58:31 +0100
Subject: h264: fix invalid shifts in init_cavlc_level_tab()

The level_code expression includes a shift which is invalid in
those cases where the value is not used.  Moving the calculation
to the branch where the result is used avoids these.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/h264_cavlc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 9e7c182..ca7b939 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -238,17 +238,18 @@ static inline int pred_non_zero_count(H264Context *h, int n){
 }
 
 static av_cold void init_cavlc_level_tab(void){
-    int suffix_length, mask;
+    int suffix_length;
     unsigned int i;
 
     for(suffix_length=0; suffix_length<7; suffix_length++){
         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
-            int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
 
-            mask= -(level_code&1);
-            level_code= (((2+level_code)>>1) ^ mask) - mask;
             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
+                int level_code = (prefix << suffix_length) +
+                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
+                int mask = -(level_code&1);
+                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
                 cavlc_level_tab[suffix_length][i][0]= level_code;
                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
             }else if(prefix + 1 <= LEVEL_TAB_BITS){
-- 
cgit v1.1


From 92fb52d9060a146f31da6f07ea9ce7867294e153 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 6 Oct 2011 08:03:38 -0700
Subject: prores: extract idct into its own dspcontext and merge with
 put_pixels.

---
 libavcodec/Makefile      |  2 +-
 libavcodec/dsputil.c     | 65 +++++++++++++++++++++----------------
 libavcodec/dsputil.h     |  2 ++
 libavcodec/proresdec.c   | 84 +++++++++++-------------------------------------
 libavcodec/proresdsp.c   | 61 +++++++++++++++++++++++++++++++++++
 libavcodec/proresdsp.h   | 38 ++++++++++++++++++++++
 libavcodec/simple_idct.c | 17 ++++++++++
 libavcodec/simple_idct.h |  6 ++++
 8 files changed, 180 insertions(+), 95 deletions(-)
 create mode 100644 libavcodec/proresdsp.c
 create mode 100644 libavcodec/proresdsp.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3c4e2f8..b7b5124 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
 OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
-OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o
+OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
                                           celp_filters.o acelp_vectors.o \
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 967406e..182063c 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
     }
 }
 
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                   int idct_permutation_type)
+{
+    int i;
+
+    switch(idct_permutation_type){
+    case FF_NO_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= i;
+        break;
+    case FF_LIBMPEG2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_SIMPLE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= simple_mmx_permutation[i];
+        break;
+    case FF_TRANSPOSE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= ((i&7)<<3) | (i>>3);
+        break;
+    case FF_PARTTRANS_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
+        break;
+    case FF_SSE2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
+    }
+}
+
 static int pix_sum_c(uint8_t * pix, int line_size)
 {
     int s, i, j;
@@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
     }
 
-    switch(c->idct_permutation_type){
-    case FF_NO_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= i;
-        break;
-    case FF_LIBMPEG2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
-        break;
-    case FF_SIMPLE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= simple_mmx_permutation[i];
-        break;
-    case FF_TRANSPOSE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
-        break;
-    case FF_PARTTRANS_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
-        break;
-    case FF_SSE2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
-    }
+    ff_init_scantable_permutation(c->idct_permutation,
+                                  c->idct_permutation_type);
 }
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 73830f8..bef2cdd 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -202,6 +202,8 @@ typedef struct ScanTable{
 } ScanTable;
 
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                   int idct_permutation_type);
 
 #define EMULATED_EDGE(depth) \
 void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index c70d145..cbbd068 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -34,17 +34,11 @@
 
 #include "libavutil/intmath.h"
 #include "avcodec.h"
-#include "dsputil.h"
+#include "proresdsp.h"
 #include "get_bits.h"
 
-#define BITS_PER_SAMPLE 10                              ///< output precision of that decoder
-#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
-#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
-#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
-
-
 typedef struct {
-    DSPContext dsp;
+    ProresDSPContext dsp;
     AVFrame    picture;
     ScanTable  scantable;
     int        scantable_type;           ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
@@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
 
-    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
-    dsputil_init(&ctx->dsp, avctx);
+    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
+    ff_proresdsp_init(&ctx->dsp);
 
     avctx->coded_frame = &ctx->picture;
     avcodec_get_frame_defaults(&ctx->picture);
@@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
 }
 
 
-#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
-
-/**
- * Add bias value, clamp and output pixels of a slice
- */
-static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
-                       int mbs_per_slice, int blocks_per_mb)
-{
-    int mb, x, y, src_offset, dst_offset;
-    const DCTELEM *src1, *src2;
-    uint16_t *dst1, *dst2;
-
-    src1 = in;
-    src2 = in + (blocks_per_mb << 5);
-    dst1 = out;
-    dst2 = out + (stride << 3);
-
-    for (mb = 0; mb < mbs_per_slice; mb++) {
-        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
-            for (x = 0; x < 8; x++) {
-                src_offset = (y << 3) + x;
-
-                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
-                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
-
-                if (blocks_per_mb > 2) {
-                    dst1[dst_offset + x + 8] =
-                        CLIP_AND_BIAS(src1[src_offset + 64]);
-                    dst2[dst_offset + x + 8] =
-                        CLIP_AND_BIAS(src2[src_offset + 64]);
-                }
-            }
-        }
-
-        src1 += blocks_per_mb << 6;
-        src2 += blocks_per_mb << 6;
-        dst1 += blocks_per_mb << 2;
-        dst2 += blocks_per_mb << 2;
-    }
-}
-
-
 /**
  * Decode a slice plane (luma or chroma).
  */
@@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
 {
     GetBitContext gb;
     DCTELEM *block_ptr;
-    int i, blk_num, blocks_per_slice;
+    int mb_num, blocks_per_slice;
 
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
@@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
     /* inverse quantization, inverse transform and output */
     block_ptr = ctx->blocks;
 
-    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
-        /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
-         * and the input of the inverse transform should be scaled by 2
-         * in order to avoid rounding errors.
-         * Due to the fact the existing Libav transforms are incompatible with
-         * that input I temporally introduced the coarse solution below... */
-        for (i = 0; i < 64; i++)
-            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
-
-        ctx->dsp.idct(block_ptr);
+    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
+        ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
+        block_ptr += 64;
+        if (blocks_per_mb > 2) {
+            ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
+            block_ptr += 64;
+        }
+        ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
+        block_ptr += 64;
+        if (blocks_per_mb > 2) {
+            ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
+            block_ptr += 64;
+        }
     }
-
-    put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
-               blocks_per_mb);
 }
 
 
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
new file mode 100644
index 0000000..3038176
--- /dev/null
+++ b/libavcodec/proresdsp.c
@@ -0,0 +1,61 @@
+/*
+ * Apple ProRes compatible decoder
+ *
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "proresdsp.h"
+#include "simple_idct.h"
+
+#define BIAS     (1 << (PRORES_BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
+#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
+#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
+
+#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
+
+/**
+ * Add bias value, clamp and output pixels of a slice
+ */
+static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
+{
+    int x, y, src_offset, dst_offset;
+
+    for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
+        for (x = 0; x < 8; x++) {
+            src_offset = (y << 3) + x;
+
+            dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
+        }
+    }
+}
+
+static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat)
+{
+    ff_prores_idct(block, qmat);
+    put_pixels(out, linesize >> 1, block);
+}
+
+void ff_proresdsp_init(ProresDSPContext *dsp)
+{
+    dsp->idct_put = prores_idct_put_c;
+    dsp->idct_permutation_type = FF_NO_IDCT_PERM;
+
+    ff_init_scantable_permutation(dsp->idct_permutation,
+                                  dsp->idct_permutation_type);
+}
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
new file mode 100644
index 0000000..18d6bf5
--- /dev/null
+++ b/libavcodec/proresdsp.h
@@ -0,0 +1,38 @@
+/*
+ * Apple ProRes compatible decoder
+ *
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PRORESDSP_H
+#define AVCODEC_PRORESDSP_H
+
+#include "dsputil.h"
+
+#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
+
+typedef struct {
+    int idct_permutation_type;
+    uint8_t idct_permutation[64];
+    void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
+} ProresDSPContext;
+
+void ff_proresdsp_init(ProresDSPContext *dsp);
+
+#endif /* AVCODEC_PRORESDSP_H */
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index b62658b..c6cd908 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
         idct4col_add(dest + i, line_size, block + i);
     }
 }
+
+void ff_prores_idct(DCTELEM *block, const int16_t *qmat)
+{
+    int i;
+
+    for (i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (i = 0; i < 8; i++)
+        idctRowCondDC_10(block + i*8);
+
+    for (i = 0; i < 64; i++)
+        block[i] >>= 2;
+
+    for (i = 0; i < 8; i++)
+        idctSparseCol_10(block + i);
+}
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index a33eb96..6e22158 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block);
 void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
 void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
 void ff_simple_idct_10(DCTELEM *block);
+/**
+ * Special version of ff_simple_idct_10() which does dequantization
+ * and scales by a factor of 2 more between the two IDCTs to account
+ * for larger scale of input coefficients.
+ */
+void ff_prores_idct(DCTELEM *block, const int16_t *qmat);
 
 void ff_simple_idct_mmx(int16_t *block);
 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
-- 
cgit v1.1


From 6aa3cac6bf561712086ae413a36b5f05087c8887 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sat, 8 Oct 2011 21:40:01 -0700
Subject: swscale: use aligned move for storage into temporary buffer.

The intermediate buffer is always aligned.
---
 libswscale/x86/scale.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index ebaab34..d355894 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -369,7 +369,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
     cvtps2dq      m0, m0
 %endif ; mmx/sse2/ssse3/sse4
 %ifnidn %3, X
-    movu [r1+r2*(4>>r2shr)], m0
+    mova [r1+r2*(4>>r2shr)], m0
 %else ; %3 == X
     movq   [r1+r2*4], m0
 %endif ; %3 ==/!= X
-- 
cgit v1.1


From e3f530feca80627e278e15fbe3b60cef7a6b630d Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Fri, 30 Sep 2011 14:37:11 +0200
Subject: prores: idct sse2/sse4 optimizations.

~3.0-3.5x as fast as original C version, 1.6x as fast overall.
---
 libavcodec/proresdec.c          |   4 +-
 libavcodec/proresdsp.c          |   2 +
 libavcodec/proresdsp.h          |   2 +
 libavcodec/x86/Makefile         |   2 +
 libavcodec/x86/dsputil_mmx.c    |   2 +
 libavcodec/x86/proresdsp-init.c |  54 +++++
 libavcodec/x86/proresdsp.asm    | 432 ++++++++++++++++++++++++++++++++++++++++
 7 files changed, 496 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/x86/proresdsp-init.c
 create mode 100644 libavcodec/x86/proresdsp.asm

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index cbbd068..3b539a1 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -545,8 +545,8 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
     if (ctx->qmat_changed || sf != ctx->prev_slice_sf) {
         ctx->prev_slice_sf = sf;
         for (i = 0; i < 64; i++) {
-            ctx->qmat_luma_scaled[i]   = ctx->qmat_luma[i]   * sf;
-            ctx->qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * sf;
+            ctx->qmat_luma_scaled[ctx->dsp.idct_permutation[i]]   = ctx->qmat_luma[i]   * sf;
+            ctx->qmat_chroma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_chroma[i] * sf;
         }
     }
 
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index 3038176..7e753e9 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -56,6 +56,8 @@ void ff_proresdsp_init(ProresDSPContext *dsp)
     dsp->idct_put = prores_idct_put_c;
     dsp->idct_permutation_type = FF_NO_IDCT_PERM;
 
+    if (HAVE_MMX) ff_proresdsp_x86_init(dsp);
+
     ff_init_scantable_permutation(dsp->idct_permutation,
                                   dsp->idct_permutation_type);
 }
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
index 18d6bf5..8b864fa 100644
--- a/libavcodec/proresdsp.h
+++ b/libavcodec/proresdsp.h
@@ -35,4 +35,6 @@ typedef struct {
 
 void ff_proresdsp_init(ProresDSPContext *dsp);
 
+void ff_proresdsp_x86_init(ProresDSPContext *dsp);
+
 #endif /* AVCODEC_PRORESDSP_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index a94f97a..ab13109 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -33,6 +33,8 @@ MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
 MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
+YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
+MMX-OBJS-$(CONFIG_PRORES_DECODER)      += x86/proresdsp-init.o
 MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
 YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/vp3dsp.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 2fb75cb..58620d6 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -64,6 +64,8 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_512) = {0x0200020002000200ULL, 0x0200020002000200ULL};
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019)= {0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL};
 
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_0  ) = {0x0000000000000000ULL, 0x0000000000000000ULL};
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_1  ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
diff --git a/libavcodec/x86/proresdsp-init.c b/libavcodec/x86/proresdsp-init.c
new file mode 100644
index 0000000..9760105
--- /dev/null
+++ b/libavcodec/x86/proresdsp-init.c
@@ -0,0 +1,54 @@
+/*
+ * Apple ProRes compatible decoder
+ *
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/proresdsp.h"
+
+void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
+                                DCTELEM *block);
+void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
+                                DCTELEM *block);
+void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
+                                DCTELEM *block);
+
+void ff_proresdsp_x86_init(ProresDSPContext *dsp)
+{
+#if ARCH_X86_64
+    int flags = av_get_cpu_flags();
+
+    if (flags & AV_CPU_FLAG_SSE2) {
+        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+        dsp->idct_put = ff_prores_idct_put_10_sse2;
+    }
+
+    if (flags & AV_CPU_FLAG_SSE4) {
+        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+        dsp->idct_put = ff_prores_idct_put_10_sse4;
+    }
+
+#if HAVE_AVX
+    if (flags & AV_CPU_FLAG_AVX) {
+        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+        dsp->idct_put = ff_prores_idct_put_10_avx;
+    }
+#endif
+#endif
+}
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
new file mode 100644
index 0000000..9365bf1
--- /dev/null
+++ b/libavcodec/x86/proresdsp.asm
@@ -0,0 +1,432 @@
+;******************************************************************************
+;* x86-SIMD-optimized IDCT for prores
+;* this is identical to "simple" IDCT except for the clip range
+;*
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1
+%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1
+%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2
+%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1
+%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1
+%define W6sh2  8867 ; W6 = 35468 =  8867<<2
+%define W7sh2  4520 ; W7 = 18081 =  4520<<2 + 1
+
+%ifdef ARCH_X86_64
+
+SECTION_RODATA
+
+w4_plus_w2: times 4 dw W4sh2, +W2sh2
+w4_min_w2:  times 4 dw W4sh2, -W2sh2
+w4_plus_w6: times 4 dw W4sh2, +W6sh2
+w4_min_w6:  times 4 dw W4sh2, -W6sh2
+w1_plus_w3: times 4 dw W1sh2, +W3sh2
+w3_min_w1:  times 4 dw W3sh2, -W1sh2
+w7_plus_w3: times 4 dw W7sh2, +W3sh2
+w3_min_w7:  times 4 dw W3sh2, -W7sh2
+w1_plus_w5: times 4 dw W1sh2, +W5sh2
+w5_min_w1:  times 4 dw W5sh2, -W1sh2
+w5_plus_w7: times 4 dw W5sh2, +W7sh2
+w7_min_w5:  times 4 dw W7sh2, -W5sh2
+row_round:  times 8 dw (1<<14)
+
+cextern pw_4
+cextern pw_8
+cextern pw_512
+cextern pw_1019
+
+section .text align=16
+
+; interleave data while maintaining source
+; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave
+%macro SBUTTERFLY3 5
+    punpckl%1   m%2, m%4, m%5
+    punpckh%1   m%3, m%4, m%5
+%endmacro
+
+; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift
+; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6
+;         %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3)
+%macro SUMSUB_SHPK 7
+    psubd       %3,  %1,  %5       ; { a0 - b0 }[0-3]
+    psubd       %4,  %2,  %6       ; { a0 - b0 }[4-7]
+    paddd       %1,  %5            ; { a0 + b0 }[0-3]
+    paddd       %2,  %6            ; { a0 + b0 }[4-7]
+    psrad       %1,  %7
+    psrad       %2,  %7
+    psrad       %3,  %7
+    psrad       %4,  %7
+    packssdw    %1,  %2            ; row[0]
+    packssdw    %3,  %4            ; row[7]
+%endmacro
+
+; %1 = row or col (for rounding variable)
+; %2 = number of bits to shift at the end
+; %3 = optimization
+%macro IDCT_1D 3
+    ; a0 = (W4 * row[0]) + (1 << (15 - 1));
+    ; a1 = a0;
+    ; a2 = a0;
+    ; a3 = a0;
+    ; a0 += W2 * row[2];
+    ; a1 += W6 * row[2];
+    ; a2 -= W6 * row[2];
+    ; a3 -= W2 * row[2];
+%ifidn %1, col
+    paddw       m10,[pw_8]
+%endif
+    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
+%ifidn %1, row
+    psubw       m10,[row_round]
+%endif
+    SIGNEXTEND  m8,  m9,  m14      ; { row[2] }[0-3] / [4-7]
+    SIGNEXTEND  m10, m11, m14      ; { row[0] }[0-3] / [4-7]
+    pmaddwd     m2,  m0, [w4_plus_w6]
+    pmaddwd     m3,  m1, [w4_plus_w6]
+    pmaddwd     m4,  m0, [w4_min_w6]
+    pmaddwd     m5,  m1, [w4_min_w6]
+    pmaddwd     m6,  m0, [w4_min_w2]
+    pmaddwd     m7,  m1, [w4_min_w2]
+    pmaddwd     m0, [w4_plus_w2]
+    pmaddwd     m1, [w4_plus_w2]
+    pslld       m2,  2
+    pslld       m3,  2
+    pslld       m4,  2
+    pslld       m5,  2
+    pslld       m6,  2
+    pslld       m7,  2
+    pslld       m0,  2
+    pslld       m1,  2
+
+    ; a0: -1*row[0]-1*row[2]
+    ; a1: -1*row[0]
+    ; a2: -1*row[0]
+    ; a3: -1*row[0]+1*row[2]
+    psubd       m2,  m10           ; a1[0-3]
+    psubd       m3,  m11           ; a1[4-7]
+    psubd       m4,  m10           ; a2[0-3]
+    psubd       m5,  m11           ; a2[4-7]
+    psubd       m0,  m10
+    psubd       m1,  m11
+    psubd       m6,  m10
+    psubd       m7,  m11
+    psubd       m0,  m8            ; a0[0-3]
+    psubd       m1,  m9            ; a0[4-7]
+    paddd       m6,  m8            ; a3[0-3]
+    paddd       m7,  m9            ; a3[4-7]
+
+    ; a0 +=   W4*row[4] + W6*row[6]; i.e. -1*row[4]
+    ; a1 -=   W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
+    ; a2 -=   W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
+    ; a3 +=   W4*row[4] - W6*row[6]; i.e. -1*row[4]
+    SBUTTERFLY3 wd,  8,  9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
+    SIGNEXTEND  m13, m14, m10      ; { row[4] }[0-3] / [4-7]
+    pmaddwd     m10, m8, [w4_plus_w6]
+    pmaddwd     m11, m9, [w4_plus_w6]
+    pslld       m10, 2
+    pslld       m11, 2
+    psubd       m10,  m13
+    psubd       m11,  m14
+    paddd       m0,  m10            ; a0[0-3]
+    paddd       m1,  m11            ; a0[4-7]
+    pmaddwd     m10, m8, [w4_min_w6]
+    pmaddwd     m11, m9, [w4_min_w6]
+    pslld       m10, 2
+    pslld       m11, 2
+    psubd       m10, m13
+    psubd       m11, m14
+    paddd       m6,  m10           ; a3[0-3]
+    paddd       m7,  m11           ; a3[4-7]
+    pmaddwd     m10, m8, [w4_min_w2]
+    pmaddwd     m11, m9, [w4_min_w2]
+    pmaddwd     m8, [w4_plus_w2]
+    pmaddwd     m9, [w4_plus_w2]
+    pslld       m10, 2
+    pslld       m11, 2
+    pslld       m8,  2
+    pslld       m9,  2
+    psubd       m10, m13
+    psubd       m11, m14
+    psubd       m8,  m13
+    psubd       m9,  m14
+    psubd       m4,  m10           ; a2[0-3] intermediate
+    psubd       m5,  m11           ; a2[4-7] intermediate
+    psubd       m2,  m8            ; a1[0-3] intermediate
+    psubd       m3,  m9            ; a1[4-7] intermediate
+    SIGNEXTEND  m12, m13, m10      ; { row[6] }[0-3] / [4-7]
+    psubd       m4,  m12           ; a2[0-3]
+    psubd       m5,  m13           ; a2[4-7]
+    paddd       m2,  m12           ; a1[0-3]
+    paddd       m3,  m13           ; a1[4-7]
+
+    ; load/store
+    mova   [r2+  0], m0
+    mova   [r2+ 32], m2
+    mova   [r2+ 64], m4
+    mova   [r2+ 96], m6
+    mova        m10,[r2+ 16]       ; { row[1] }[0-7]
+    mova        m8, [r2+ 48]       ; { row[3] }[0-7]
+    mova        m13,[r2+ 80]       ; { row[5] }[0-7]
+    mova        m14,[r2+112]       ; { row[7] }[0-7]
+    mova   [r2+ 16], m1
+    mova   [r2+ 48], m3
+    mova   [r2+ 80], m5
+    mova   [r2+112], m7
+%ifidn %1, row
+    pmullw      m10,[r3+ 16]
+    pmullw      m8, [r3+ 48]
+    pmullw      m13,[r3+ 80]
+    pmullw      m14,[r3+112]
+%endif
+
+    ; b0 = MUL(W1, row[1]);
+    ; MAC(b0, W3, row[3]);
+    ; b1 = MUL(W3, row[1]);
+    ; MAC(b1, -W7, row[3]);
+    ; b2 = MUL(W5, row[1]);
+    ; MAC(b2, -W1, row[3]);
+    ; b3 = MUL(W7, row[1]);
+    ; MAC(b3, -W5, row[3]);
+    SBUTTERFLY3 wd,  0,  1, 10, 8  ; { row[1], row[3] }[0-3]/[4-7]
+    SIGNEXTEND  m10, m11, m12      ; { row[1] }[0-3] / [4-7]
+    SIGNEXTEND  m8,  m9,  m12      ; { row[3] }[0-3] / [4-7]
+    pmaddwd     m2,  m0, [w3_min_w7]
+    pmaddwd     m3,  m1, [w3_min_w7]
+    pmaddwd     m4,  m0, [w5_min_w1]
+    pmaddwd     m5,  m1, [w5_min_w1]
+    pmaddwd     m6,  m0, [w7_min_w5]
+    pmaddwd     m7,  m1, [w7_min_w5]
+    pmaddwd     m0, [w1_plus_w3]
+    pmaddwd     m1, [w1_plus_w3]
+    pslld       m2,  2
+    pslld       m3,  2
+    pslld       m4,  2
+    pslld       m5,  2
+    pslld       m6,  2
+    pslld       m7,  2
+    pslld       m0,  2
+    pslld       m1,  2
+
+    ; b0: +1*row[1]+2*row[3]
+    ; b1: +2*row[1]-1*row[3]
+    ; b2: -1*row[1]-1*row[3]
+    ; b3: +1*row[1]+1*row[3]
+    psubd       m2,  m8
+    psubd       m3,  m9
+    paddd       m0,  m8
+    paddd       m1,  m9
+    paddd       m8,  m10           ; { row[1] + row[3] }[0-3]
+    paddd       m9,  m11           ; { row[1] + row[3] }[4-7]
+    paddd       m10, m10
+    paddd       m11, m11
+    paddd       m0,  m8            ; b0[0-3]
+    paddd       m1,  m9            ; b0[4-7]
+    paddd       m2,  m10           ; b1[0-3]
+    paddd       m3,  m11           ; b2[4-7]
+    psubd       m4,  m8            ; b2[0-3]
+    psubd       m5,  m9            ; b2[4-7]
+    paddd       m6,  m8            ; b3[0-3]
+    paddd       m7,  m9            ; b3[4-7]
+
+    ; MAC(b0,  W5, row[5]);
+    ; MAC(b0,  W7, row[7]);
+    ; MAC(b1, -W1, row[5]);
+    ; MAC(b1, -W5, row[7]);
+    ; MAC(b2,  W7, row[5]);
+    ; MAC(b2,  W3, row[7]);
+    ; MAC(b3,  W3, row[5]);
+    ; MAC(b3, -W1, row[7]);
+    SBUTTERFLY3 wd,  8,  9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
+    SIGNEXTEND  m13, m12, m11      ; { row[5] }[0-3] / [4-7]
+    SIGNEXTEND  m14, m11, m10      ; { row[7] }[0-3] / [4-7]
+
+    ; b0: -1*row[5]+1*row[7]
+    ; b1: -1*row[5]+1*row[7]
+    ; b2: +1*row[5]+2*row[7]
+    ; b3: +2*row[5]-1*row[7]
+    paddd       m4,  m13
+    paddd       m5,  m12
+    paddd       m6,  m13
+    paddd       m7,  m12
+    psubd       m13, m14           ; { row[5] - row[7] }[0-3]
+    psubd       m12, m11           ; { row[5] - row[7] }[4-7]
+    paddd       m14, m14
+    paddd       m11, m11
+    psubd       m0,  m13
+    psubd       m1,  m12
+    psubd       m2,  m13
+    psubd       m3,  m12
+    paddd       m4,  m14
+    paddd       m5,  m11
+    paddd       m6,  m13
+    paddd       m7,  m12
+
+    pmaddwd     m10, m8, [w1_plus_w5]
+    pmaddwd     m11, m9, [w1_plus_w5]
+    pmaddwd     m12, m8, [w5_plus_w7]
+    pmaddwd     m13, m9, [w5_plus_w7]
+    pslld       m10, 2
+    pslld       m11, 2
+    pslld       m12,  2
+    pslld       m13,  2
+    psubd       m2,  m10           ; b1[0-3]
+    psubd       m3,  m11           ; b1[4-7]
+    paddd       m0,  m12            ; b0[0-3]
+    paddd       m1,  m13            ; b0[4-7]
+    pmaddwd     m12, m8, [w7_plus_w3]
+    pmaddwd     m13, m9, [w7_plus_w3]
+    pmaddwd     m8, [w3_min_w1]
+    pmaddwd     m9, [w3_min_w1]
+    pslld       m12, 2
+    pslld       m13, 2
+    pslld       m8,  2
+    pslld       m9,  2
+    paddd       m4,  m12           ; b2[0-3]
+    paddd       m5,  m13           ; b2[4-7]
+    paddd       m6,  m8            ; b3[0-3]
+    paddd       m7,  m9            ; b3[4-7]
+
+    ; row[0] = (a0 + b0) >> 15;
+    ; row[7] = (a0 - b0) >> 15;
+    ; row[1] = (a1 + b1) >> 15;
+    ; row[6] = (a1 - b1) >> 15;
+    ; row[2] = (a2 + b2) >> 15;
+    ; row[5] = (a2 - b2) >> 15;
+    ; row[3] = (a3 + b3) >> 15;
+    ; row[4] = (a3 - b3) >> 15;
+    mova        m8, [r2+ 0]        ; a0[0-3]
+    mova        m9, [r2+16]        ; a0[4-7]
+    SUMSUB_SHPK m8,  m9,  m10, m11, m0,  m1,  %2
+    mova        m0, [r2+32]        ; a1[0-3]
+    mova        m1, [r2+48]        ; a1[4-7]
+    SUMSUB_SHPK m0,  m1,  m9,  m11, m2,  m3,  %2
+    mova        m1, [r2+64]        ; a2[0-3]
+    mova        m2, [r2+80]        ; a2[4-7]
+    SUMSUB_SHPK m1,  m2,  m11, m3,  m4,  m5,  %2
+    mova        m2, [r2+96]        ; a3[0-3]
+    mova        m3, [r2+112]       ; a3[4-7]
+    SUMSUB_SHPK m2,  m3,  m4,  m5,  m6,  m7,  %2
+%endmacro
+
+; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
+;                               DCTELEM *block, const int16_t *qmat);
+%macro idct_put_fn 2
+cglobal prores_idct_put_10_%1, 4, 4, %2
+    movsxd      r1,  r1d
+    pxor        m15, m15           ; zero
+
+    ; for (i = 0; i < 8; i++)
+    ;     idctRowCondDC(block + i*8);
+    mova        m10,[r2+ 0]        ; { row[0] }[0-7]
+    mova        m8, [r2+32]        ; { row[2] }[0-7]
+    mova        m13,[r2+64]        ; { row[4] }[0-7]
+    mova        m12,[r2+96]        ; { row[6] }[0-7]
+
+    pmullw      m10,[r3+ 0]
+    pmullw      m8, [r3+32]
+    pmullw      m13,[r3+64]
+    pmullw      m12,[r3+96]
+
+    IDCT_1D     row, 17,  %1
+
+    ; transpose for second part of IDCT
+    TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
+    mova   [r2+ 16], m0
+    mova   [r2+ 48], m2
+    mova   [r2+ 80], m11
+    mova   [r2+112], m10
+    SWAP         8,  10
+    SWAP         1,   8
+    SWAP         4,  13
+    SWAP         9,  12
+
+    ; for (i = 0; i < 8; i++)
+    ;     idctSparseColAdd(dest + i, line_size, block + i);
+    IDCT_1D     col, 20,  %1
+
+    ; clip/store
+    mova        m6, [pw_512]
+    mova        m3, [pw_4]
+    mova        m5, [pw_1019]
+    paddw       m8,  m6
+    paddw       m0,  m6
+    paddw       m1,  m6
+    paddw       m2,  m6
+    paddw       m4,  m6
+    paddw       m11, m6
+    paddw       m9,  m6
+    paddw       m10, m6
+    pmaxsw      m8,  m3
+    pmaxsw      m0,  m3
+    pmaxsw      m1,  m3
+    pmaxsw      m2,  m3
+    pmaxsw      m4,  m3
+    pmaxsw      m11, m3
+    pmaxsw      m9,  m3
+    pmaxsw      m10, m3
+    pminsw      m8,  m5
+    pminsw      m0,  m5
+    pminsw      m1,  m5
+    pminsw      m2,  m5
+    pminsw      m4,  m5
+    pminsw      m11, m5
+    pminsw      m9,  m5
+    pminsw      m10, m5
+
+    lea         r2, [r1*3]
+    mova  [r0     ], m8
+    mova  [r0+r1  ], m0
+    mova  [r0+r1*2], m1
+    mova  [r0+r2  ], m2
+    lea         r0, [r0+r1*4]
+    mova  [r0     ], m4
+    mova  [r0+r1  ], m11
+    mova  [r0+r1*2], m9
+    mova  [r0+r2  ], m10
+    RET
+%endmacro
+
+%macro signextend_sse2 3 ; dstlow, dsthigh, tmp
+    pxor        %3,  %3
+    pcmpgtw     %3,  %1
+    mova        %2,  %1
+    punpcklwd   %1,  %3
+    punpckhwd   %2,  %3
+%endmacro
+
+%macro signextend_sse4 2-3 ; dstlow, dsthigh
+    movhlps     %2,  %1
+    pmovsxwd    %1,  %1
+    pmovsxwd    %2,  %2
+%endmacro
+
+INIT_XMM
+%define SIGNEXTEND signextend_sse2
+idct_put_fn sse2, 16
+INIT_XMM
+%define SIGNEXTEND signextend_sse4
+idct_put_fn sse4, 16
+INIT_AVX
+idct_put_fn avx,  16
+
+%endif
-- 
cgit v1.1


From 91f4732913f657f426f2707c45b9026e12b04eb2 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 6 Oct 2011 07:17:44 -0700
Subject: prores: implement multithreading.

60% speedup (overall decoding time) at 2 threads, and another 60%
speedup at 4 threads.
---
 libavcodec/proresdec.c | 71 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 3b539a1..83c083f 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -38,6 +38,14 @@
 #include "get_bits.h"
 
 typedef struct {
+    const uint8_t *index;            ///< pointers to the data of this slice
+    int slice_num;
+    int x_pos, y_pos;
+    int slice_width;
+    DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]);
+} ProresThreadData;
+
+typedef struct {
     ProresDSPContext dsp;
     AVFrame    picture;
     ScanTable  scantable;
@@ -51,9 +59,9 @@ typedef struct {
     int        prev_slice_sf;            ///< scalefactor of the previous decoded slice
     DECLARE_ALIGNED(16, int16_t, qmat_luma_scaled[64]);
     DECLARE_ALIGNED(16, int16_t, qmat_chroma_scaled[64]);
-    DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]);
     int        total_slices;            ///< total number of slices in a picture
-    const uint8_t **slice_data_index;   ///< array of pointers to the data of each slice
+    ProresThreadData *slice_data;
+    int        pic_num;
     int        chroma_factor;
     int        mb_chroma_factor;
     int        num_chroma_blocks;       ///< number of chrominance blocks in a macroblock
@@ -94,7 +102,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ProresContext *ctx = avctx->priv_data;
 
     ctx->total_slices     = 0;
-    ctx->slice_data_index = 0;
+    ctx->slice_data       = NULL;
 
     avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
 
@@ -265,9 +273,9 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf,
     }
 
     if (ctx->total_slices != num_slices) {
-        av_freep(&ctx->slice_data_index);
-        ctx->slice_data_index = av_malloc((num_slices + 1) * sizeof(uint8_t*));
-        if (!ctx->slice_data_index)
+        av_freep(&ctx->slice_data);
+        ctx->slice_data = av_malloc((num_slices + 1) * sizeof(ctx->slice_data[0]));
+        if (!ctx->slice_data)
             return AVERROR(ENOMEM);
         ctx->total_slices = num_slices;
     }
@@ -282,10 +290,10 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf,
     data_ptr = index_ptr + num_slices * 2;
 
     for (i = 0; i < num_slices; i++) {
-        ctx->slice_data_index[i] = data_ptr;
+        ctx->slice_data[i].index = data_ptr;
         data_ptr += AV_RB16(index_ptr + i * 2);
     }
-    ctx->slice_data_index[i] = data_ptr;
+    ctx->slice_data[i].index = data_ptr;
 
     if (data_ptr > buf + data_size) {
         av_log(avctx, AV_LOG_ERROR, "out of slice data\n");
@@ -446,7 +454,8 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
 /**
  * Decode a slice plane (luma or chroma).
  */
-static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
+static void decode_slice_plane(ProresContext *ctx, ProresThreadData *td,
+                               const uint8_t *buf,
                                int data_size, uint16_t *out_ptr,
                                int linesize, int mbs_per_slice,
                                int blocks_per_mb, int plane_size_factor,
@@ -458,17 +467,17 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
 
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
-    memset(ctx->blocks, 0, 8 * 4 * 64 * sizeof(*ctx->blocks));
+    memset(td->blocks, 0, 8 * 4 * 64 * sizeof(*td->blocks));
 
     init_get_bits(&gb, buf, data_size << 3);
 
-    decode_dc_coeffs(&gb, ctx->blocks, blocks_per_slice);
+    decode_dc_coeffs(&gb, td->blocks, blocks_per_slice);
 
-    decode_ac_coeffs(&gb, ctx->blocks, blocks_per_slice,
+    decode_ac_coeffs(&gb, td->blocks, blocks_per_slice,
                      plane_size_factor, ctx->scantable.permutated);
 
     /* inverse quantization, inverse transform and output */
-    block_ptr = ctx->blocks;
+    block_ptr = td->blocks;
 
     for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
         ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
@@ -487,10 +496,14 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
 }
 
 
-static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
-                        int mb_x_pos, int mb_y_pos, int mbs_per_slice,
-                        AVCodecContext *avctx)
+static int decode_slice(AVCodecContext *avctx, ProresThreadData *td)
 {
+    ProresContext *ctx = avctx->priv_data;
+    int mb_x_pos  = td->x_pos;
+    int mb_y_pos  = td->y_pos;
+    int pic_num   = ctx->pic_num;
+    int slice_num = td->slice_num;
+    int mbs_per_slice = td->slice_width;
     const uint8_t *buf;
     uint8_t *y_data, *u_data, *v_data;
     AVFrame *pic = avctx->coded_frame;
@@ -498,8 +511,8 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
     int slice_data_size, hdr_size, y_data_size, u_data_size, v_data_size;
     int y_linesize, u_linesize, v_linesize;
 
-    buf             = ctx->slice_data_index[slice_num];
-    slice_data_size = ctx->slice_data_index[slice_num + 1] - buf;
+    buf             = ctx->slice_data[slice_num].index;
+    slice_data_size = ctx->slice_data[slice_num + 1].index - buf;
 
     slice_width_factor = av_log2(mbs_per_slice);
 
@@ -551,14 +564,14 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
     }
 
     /* decode luma plane */
-    decode_slice_plane(ctx, buf + hdr_size, y_data_size,
+    decode_slice_plane(ctx, td, buf + hdr_size, y_data_size,
                        (uint16_t*) (y_data + (mb_y_pos << 4) * y_linesize +
                                     (mb_x_pos << 5)), y_linesize,
                        mbs_per_slice, 4, slice_width_factor + 2,
                        ctx->qmat_luma_scaled);
 
     /* decode U chroma plane */
-    decode_slice_plane(ctx, buf + hdr_size + y_data_size, u_data_size,
+    decode_slice_plane(ctx, td, buf + hdr_size + y_data_size, u_data_size,
                        (uint16_t*) (u_data + (mb_y_pos << 4) * u_linesize +
                                     (mb_x_pos << ctx->mb_chroma_factor)),
                        u_linesize, mbs_per_slice, ctx->num_chroma_blocks,
@@ -566,7 +579,7 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
                        ctx->qmat_chroma_scaled);
 
     /* decode V chroma plane */
-    decode_slice_plane(ctx, buf + hdr_size + y_data_size + u_data_size,
+    decode_slice_plane(ctx, td, buf + hdr_size + y_data_size + u_data_size,
                        v_data_size,
                        (uint16_t*) (v_data + (mb_y_pos << 4) * v_linesize +
                                     (mb_x_pos << ctx->mb_chroma_factor)),
@@ -585,6 +598,7 @@ static int decode_picture(ProresContext *ctx, int pic_num,
 
     slice_num = 0;
 
+    ctx->pic_num = pic_num;
     for (y_pos = 0; y_pos < ctx->num_y_mbs; y_pos++) {
         slice_width = 1 << ctx->slice_width_factor;
 
@@ -593,15 +607,18 @@ static int decode_picture(ProresContext *ctx, int pic_num,
             while (ctx->num_x_mbs - x_pos < slice_width)
                 slice_width >>= 1;
 
-            if (decode_slice(ctx, pic_num, slice_num, x_pos, y_pos,
-                             slice_width, avctx) < 0)
-                return -1;
+            ctx->slice_data[slice_num].slice_num   = slice_num;
+            ctx->slice_data[slice_num].x_pos       = x_pos;
+            ctx->slice_data[slice_num].y_pos       = y_pos;
+            ctx->slice_data[slice_num].slice_width = slice_width;
 
             slice_num++;
         }
     }
 
-    return 0;
+    return avctx->execute(avctx, (void *) decode_slice,
+                          ctx->slice_data, NULL, slice_num,
+                          sizeof(ctx->slice_data[0]));
 }
 
 
@@ -664,7 +681,7 @@ static av_cold int decode_close(AVCodecContext *avctx)
     if (ctx->picture.data[0])
         avctx->release_buffer(avctx, &ctx->picture);
 
-    av_freep(&ctx->slice_data_index);
+    av_freep(&ctx->slice_data);
 
     return 0;
 }
@@ -678,6 +695,6 @@ AVCodec ff_prores_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)")
 };
-- 
cgit v1.1


From fdba370f8a1bdfc22ecbdf3c7148c2f8680a4ac4 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Tue, 11 Oct 2011 16:00:21 +0100
Subject: h264: fix HRD parameters parsing

The bit_rate_value_minus1 and cpb_size_value_minus1 elements
allow a wider range than get_ue_golomb() supports.  This
adds a get_ue_golomb_long() function supporting up to 31
leading zeros, which is the maximum for these syntax
elements, and uses it in decode_hrd_parameters().

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/golomb.h  | 14 ++++++++++++++
 libavcodec/h264_ps.c |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index 83d277f..503aa14 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h
@@ -75,6 +75,20 @@ static inline int get_ue_golomb(GetBitContext *gb){
     }
 }
 
+/**
+ * Read an unsigned Exp-Golomb code in the range 0 to UINT32_MAX-1.
+ */
+static inline unsigned get_ue_golomb_long(GetBitContext *gb)
+{
+    unsigned buf, log;
+
+    buf = show_bits_long(gb, 32);
+    log = 31 - av_log2(buf);
+    skip_bits_long(gb, log);
+
+    return get_bits_long(gb, log + 1) - 1;
+}
+
  /**
  * read unsigned exp golomb code, constraint to a max of 31.
  * the return value is undefined if the stored value exceeds 31.
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index bb673e9..677ca80 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -130,8 +130,8 @@ static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
     get_bits(&s->gb, 4); /* bit_rate_scale */
     get_bits(&s->gb, 4); /* cpb_size_scale */
     for(i=0; i<cpb_count; i++){
-        get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
-        get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
+        get_ue_golomb_long(&s->gb); /* bit_rate_value_minus1 */
+        get_ue_golomb_long(&s->gb); /* cpb_size_value_minus1 */
         get_bits1(&s->gb);     /* cbr_flag */
     }
     sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
-- 
cgit v1.1


From 5f3fb599536dd5bceb1d45cb73cd0b0ce3e5560c Mon Sep 17 00:00:00 2001
From: John Stebbins <stebbins@jetheaddev.com>
Date: Tue, 11 Oct 2011 14:57:58 -0400
Subject: fix AC3ENC_OPT_MODE_ON/OFF

The values were reversed.

Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
---
 libavcodec/ac3enc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index bf5ccea..4a01749 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -73,8 +73,8 @@ typedef int64_t CoefSumType;
 #define AC3ENC_OPT_OFF              0
 #define AC3ENC_OPT_ON               1
 #define AC3ENC_OPT_NOT_INDICATED    0
-#define AC3ENC_OPT_MODE_ON          1
-#define AC3ENC_OPT_MODE_OFF         2
+#define AC3ENC_OPT_MODE_ON          2
+#define AC3ENC_OPT_MODE_OFF         1
 
 /* specific option values */
 #define AC3ENC_OPT_LARGE_ROOM       1
-- 
cgit v1.1